Command line
python /home/saxelrod/Repo/projects/chemprop/chemprop/train.py --config_path /home/saxelrod/synthetic/confs/train/config.json --data_path /home/saxelrod/synthetic/confs/train_full.csv --dataset_type regression
Args
{'activation': 'ReLU',
 'aggregation': 'mean',
 'aggregation_norm': 100,
 'atom_descriptors': None,
 'atom_descriptors_path': None,
 'atom_descriptors_size': 0,
 'atom_features_size': 0,
 'atom_messages': False,
 'batch_size': 50,
 'bias': False,
 'cache_cutoff': 10000,
 'checkpoint_dir': None,
 'checkpoint_path': None,
 'checkpoint_paths': None,
 'class_balance': False,
 'config_path': '/home/saxelrod/synthetic/confs/train/config.json',
 'crossval_index_dir': None,
 'crossval_index_file': None,
 'crossval_index_sets': None,
 'cuda': True,
 'data_path': '/home/saxelrod/synthetic/confs/train_full.csv',
 'dataset_type': 'regression',
 'depth': 4,
 'device': device(type='cuda', index=1),
 'dropout': 0.0,
 'ensemble_size': 1,
 'epochs': 30,
 'extra_metrics': [],
 'features_generator': None,
 'features_only': False,
 'features_path': None,
 'features_scaling': False,
 'features_size': None,
 'ffn_hidden_size': 400,
 'ffn_num_layers': 2,
 'final_lr': 0.0001,
 'folds_file': None,
 'gpu': 1,
 'grad_clip': None,
 'hidden_size': 400,
 'ignore_columns': None,
 'init_lr': 0.0001,
 'log_frequency': 10,
 'max_data_size': None,
 'max_lr': 0.001,
 'metric': 'mae',
 'metrics': ['mae'],
 'minimize_score': True,
 'mpn_shared': False,
 'multiclass_num_classes': 3,
 'no_cache_mol': False,
 'no_cuda': False,
 'no_features_scaling': True,
 'num_folds': 10,
 'num_lrs': 1,
 'num_tasks': 1,
 'num_workers': 8,
 'number_of_molecules': 1,
 'pytorch_seed': 0,
 'quiet': True,
 'save_dir': '/home/saxelrod/synthetic/confs/train',
 'save_preds': False,
 'save_smiles_splits': False,
 'seed': 0,
 'separate_test_features_path': None,
 'separate_test_path': '/home/saxelrod/synthetic/confs/test_full.csv',
 'separate_val_features_path': None,
 'separate_val_path': '/home/saxelrod/synthetic/confs/val_full.csv',
 'show_individual_scores': False,
 'smiles_columns': [None],
 'split_sizes': (0.8, 0.1, 0.1),
 'split_type': 'random',
 'target_columns': None,
 'task_names': ['log_uniqueconfs'],
 'test': False,
 'test_fold_index': None,
 'train_data_size': None,
 'undirected': False,
 'use_input_features': False,
 'val_fold_index': None,
 'warmup_epochs': 2.0}
Loading data
Number of tasks = 1
Fold 0
Splitting data with seed 0
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.0, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=400, bias=False)
        (W_h): Linear(in_features=400, out_features=400, bias=False)
        (W_o): Linear(in_features=533, out_features=400, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=400, out_features=400, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=400, out_features=1, bias=True)
  )
)
Number of parameters = 593,201
Moving model to cuda
Epoch 0
Loss = 9.9084e-01, PNorm = 38.3707, GNorm = 3.1325, lr_0 = 1.0413e-04
Loss = 8.8513e-01, PNorm = 38.3732, GNorm = 6.7689, lr_0 = 1.0788e-04
Loss = 8.1232e-01, PNorm = 38.3760, GNorm = 6.4582, lr_0 = 1.1163e-04
Loss = 7.8212e-01, PNorm = 38.3781, GNorm = 8.8467, lr_0 = 1.1537e-04
Loss = 7.3412e-01, PNorm = 38.3809, GNorm = 5.9024, lr_0 = 1.1913e-04
Loss = 6.0069e-01, PNorm = 38.3848, GNorm = 3.1363, lr_0 = 1.2287e-04
Loss = 6.5862e-01, PNorm = 38.3887, GNorm = 2.7361, lr_0 = 1.2663e-04
Loss = 5.8548e-01, PNorm = 38.3928, GNorm = 4.0664, lr_0 = 1.3038e-04
Loss = 6.9170e-01, PNorm = 38.3975, GNorm = 10.4434, lr_0 = 1.3413e-04
Loss = 5.3450e-01, PNorm = 38.4030, GNorm = 4.6656, lr_0 = 1.3788e-04
Loss = 5.5670e-01, PNorm = 38.4094, GNorm = 8.9884, lr_0 = 1.4163e-04
Loss = 5.9132e-01, PNorm = 38.4156, GNorm = 13.7912, lr_0 = 1.4537e-04
Loss = 4.9564e-01, PNorm = 38.4206, GNorm = 4.9332, lr_0 = 1.4913e-04
Loss = 5.3476e-01, PNorm = 38.4251, GNorm = 13.0832, lr_0 = 1.5288e-04
Loss = 5.9097e-01, PNorm = 38.4291, GNorm = 2.5007, lr_0 = 1.5662e-04
Loss = 5.8734e-01, PNorm = 38.4343, GNorm = 4.8673, lr_0 = 1.6038e-04
Loss = 5.1490e-01, PNorm = 38.4397, GNorm = 9.1547, lr_0 = 1.6412e-04
Loss = 4.4765e-01, PNorm = 38.4472, GNorm = 6.4944, lr_0 = 1.6788e-04
Loss = 3.8240e-01, PNorm = 38.4543, GNorm = 1.9852, lr_0 = 1.7163e-04
Loss = 4.5122e-01, PNorm = 38.4605, GNorm = 12.4662, lr_0 = 1.7538e-04
Loss = 4.2690e-01, PNorm = 38.4664, GNorm = 22.0253, lr_0 = 1.7913e-04
Loss = 4.7000e-01, PNorm = 38.4713, GNorm = 5.8695, lr_0 = 1.8288e-04
Loss = 4.3509e-01, PNorm = 38.4752, GNorm = 11.0089, lr_0 = 1.8662e-04
Loss = 3.9208e-01, PNorm = 38.4808, GNorm = 4.6531, lr_0 = 1.9038e-04
Loss = 4.5113e-01, PNorm = 38.4852, GNorm = 26.7654, lr_0 = 1.9413e-04
Loss = 3.9428e-01, PNorm = 38.4875, GNorm = 10.1059, lr_0 = 1.9788e-04
Loss = 5.1956e-01, PNorm = 38.4922, GNorm = 2.0981, lr_0 = 2.0163e-04
Loss = 4.4374e-01, PNorm = 38.4971, GNorm = 7.0757, lr_0 = 2.0537e-04
Loss = 4.0350e-01, PNorm = 38.5033, GNorm = 5.8859, lr_0 = 2.0913e-04
Loss = 3.8427e-01, PNorm = 38.5110, GNorm = 4.3354, lr_0 = 2.1288e-04
Loss = 3.6635e-01, PNorm = 38.5173, GNorm = 5.7555, lr_0 = 2.1663e-04
Loss = 3.2881e-01, PNorm = 38.5243, GNorm = 4.0136, lr_0 = 2.2038e-04
Loss = 4.2388e-01, PNorm = 38.5283, GNorm = 21.1551, lr_0 = 2.2412e-04
Loss = 3.6874e-01, PNorm = 38.5321, GNorm = 7.0727, lr_0 = 2.2787e-04
Loss = 3.4555e-01, PNorm = 38.5379, GNorm = 3.8800, lr_0 = 2.3163e-04
Loss = 3.2999e-01, PNorm = 38.5437, GNorm = 9.0815, lr_0 = 2.3538e-04
Loss = 3.7263e-01, PNorm = 38.5491, GNorm = 11.5183, lr_0 = 2.3913e-04
Loss = 3.4157e-01, PNorm = 38.5547, GNorm = 1.9225, lr_0 = 2.4288e-04
Loss = 3.1831e-01, PNorm = 38.5607, GNorm = 18.4879, lr_0 = 2.4662e-04
Loss = 3.9300e-01, PNorm = 38.5663, GNorm = 7.3632, lr_0 = 2.5038e-04
Loss = 4.0697e-01, PNorm = 38.5702, GNorm = 2.9728, lr_0 = 2.5413e-04
Loss = 3.6911e-01, PNorm = 38.5766, GNorm = 7.4105, lr_0 = 2.5788e-04
Loss = 3.4700e-01, PNorm = 38.5842, GNorm = 5.9601, lr_0 = 2.6163e-04
Loss = 2.9173e-01, PNorm = 38.5913, GNorm = 12.5047, lr_0 = 2.6537e-04
Loss = 3.4772e-01, PNorm = 38.5982, GNorm = 13.3107, lr_0 = 2.6912e-04
Loss = 3.6488e-01, PNorm = 38.6046, GNorm = 6.2039, lr_0 = 2.7288e-04
Loss = 3.8070e-01, PNorm = 38.6102, GNorm = 9.0423, lr_0 = 2.7663e-04
Loss = 3.9683e-01, PNorm = 38.6154, GNorm = 13.6498, lr_0 = 2.8038e-04
Loss = 4.1413e-01, PNorm = 38.6202, GNorm = 3.0324, lr_0 = 2.8413e-04
Loss = 3.3793e-01, PNorm = 38.6279, GNorm = 5.1047, lr_0 = 2.8787e-04
Loss = 3.8610e-01, PNorm = 38.6365, GNorm = 6.4472, lr_0 = 2.9163e-04
Loss = 3.2214e-01, PNorm = 38.6460, GNorm = 15.7283, lr_0 = 2.9538e-04
Loss = 3.5294e-01, PNorm = 38.6539, GNorm = 1.8292, lr_0 = 2.9913e-04
Loss = 3.7946e-01, PNorm = 38.6641, GNorm = 5.8763, lr_0 = 3.0288e-04
Loss = 3.9604e-01, PNorm = 38.6705, GNorm = 3.8968, lr_0 = 3.0662e-04
Loss = 3.2069e-01, PNorm = 38.6771, GNorm = 8.7374, lr_0 = 3.1037e-04
Loss = 3.4915e-01, PNorm = 38.6844, GNorm = 5.0194, lr_0 = 3.1413e-04
Loss = 3.1670e-01, PNorm = 38.6941, GNorm = 4.2459, lr_0 = 3.1788e-04
Loss = 3.5230e-01, PNorm = 38.6987, GNorm = 3.9494, lr_0 = 3.2163e-04
Loss = 2.8879e-01, PNorm = 38.7061, GNorm = 3.7160, lr_0 = 3.2538e-04
Loss = 3.1268e-01, PNorm = 38.7126, GNorm = 7.7535, lr_0 = 3.2912e-04
Loss = 3.0432e-01, PNorm = 38.7188, GNorm = 11.8189, lr_0 = 3.3288e-04
Loss = 2.7474e-01, PNorm = 38.7255, GNorm = 2.6137, lr_0 = 3.3663e-04
Loss = 3.1276e-01, PNorm = 38.7323, GNorm = 6.9616, lr_0 = 3.4038e-04
Loss = 2.9851e-01, PNorm = 38.7387, GNorm = 10.6459, lr_0 = 3.4413e-04
Loss = 3.1915e-01, PNorm = 38.7462, GNorm = 8.7341, lr_0 = 3.4787e-04
Loss = 3.7908e-01, PNorm = 38.7519, GNorm = 14.2961, lr_0 = 3.5162e-04
Loss = 3.0230e-01, PNorm = 38.7594, GNorm = 6.1571, lr_0 = 3.5538e-04
Loss = 3.2887e-01, PNorm = 38.7682, GNorm = 5.3797, lr_0 = 3.5913e-04
Loss = 3.3508e-01, PNorm = 38.7733, GNorm = 1.9332, lr_0 = 3.6288e-04
Loss = 2.6514e-01, PNorm = 38.7798, GNorm = 1.6469, lr_0 = 3.6662e-04
Loss = 2.9309e-01, PNorm = 38.7878, GNorm = 26.9916, lr_0 = 3.7037e-04
Loss = 3.3565e-01, PNorm = 38.7916, GNorm = 3.9701, lr_0 = 3.7413e-04
Loss = 2.9648e-01, PNorm = 38.7998, GNorm = 7.0040, lr_0 = 3.7788e-04
Loss = 2.8479e-01, PNorm = 38.8101, GNorm = 16.6936, lr_0 = 3.8163e-04
Loss = 2.8803e-01, PNorm = 38.8175, GNorm = 4.6682, lr_0 = 3.8537e-04
Loss = 3.0331e-01, PNorm = 38.8278, GNorm = 1.8538, lr_0 = 3.8912e-04
Loss = 2.9384e-01, PNorm = 38.8405, GNorm = 5.8983, lr_0 = 3.9287e-04
Loss = 3.1338e-01, PNorm = 38.8465, GNorm = 2.6043, lr_0 = 3.9663e-04
Loss = 3.8633e-01, PNorm = 38.8533, GNorm = 4.5454, lr_0 = 4.0038e-04
Loss = 5.2218e-01, PNorm = 38.8633, GNorm = 13.4313, lr_0 = 4.0413e-04
Loss = 4.4384e-01, PNorm = 38.8758, GNorm = 6.4781, lr_0 = 4.0787e-04
Loss = 3.3458e-01, PNorm = 38.8899, GNorm = 6.3255, lr_0 = 4.1162e-04
Loss = 2.8784e-01, PNorm = 38.9041, GNorm = 4.2736, lr_0 = 4.1537e-04
Loss = 2.6358e-01, PNorm = 38.9151, GNorm = 5.1661, lr_0 = 4.1913e-04
Loss = 3.5671e-01, PNorm = 38.9258, GNorm = 6.6585, lr_0 = 4.2288e-04
Loss = 3.1406e-01, PNorm = 38.9369, GNorm = 2.2470, lr_0 = 4.2662e-04
Loss = 3.3181e-01, PNorm = 38.9502, GNorm = 10.9479, lr_0 = 4.3037e-04
Loss = 2.5948e-01, PNorm = 38.9555, GNorm = 1.5632, lr_0 = 4.3412e-04
Loss = 2.8499e-01, PNorm = 38.9594, GNorm = 15.7147, lr_0 = 4.3788e-04
Loss = 3.2144e-01, PNorm = 38.9687, GNorm = 1.1434, lr_0 = 4.4163e-04
Loss = 2.6865e-01, PNorm = 38.9801, GNorm = 8.2426, lr_0 = 4.4538e-04
Loss = 2.8636e-01, PNorm = 38.9861, GNorm = 4.2889, lr_0 = 4.4912e-04
Loss = 3.0205e-01, PNorm = 38.9906, GNorm = 10.9927, lr_0 = 4.5287e-04
Loss = 2.8763e-01, PNorm = 38.9970, GNorm = 11.1635, lr_0 = 4.5662e-04
Loss = 2.7257e-01, PNorm = 39.0078, GNorm = 1.8732, lr_0 = 4.6038e-04
Loss = 2.7085e-01, PNorm = 39.0178, GNorm = 7.0618, lr_0 = 4.6413e-04
Loss = 3.1499e-01, PNorm = 39.0246, GNorm = 7.6440, lr_0 = 4.6787e-04
Loss = 2.8553e-01, PNorm = 39.0343, GNorm = 5.6048, lr_0 = 4.7162e-04
Loss = 3.1837e-01, PNorm = 39.0428, GNorm = 3.9108, lr_0 = 4.7537e-04
Loss = 2.9820e-01, PNorm = 39.0558, GNorm = 1.6267, lr_0 = 4.7913e-04
Loss = 2.7809e-01, PNorm = 39.0747, GNorm = 8.3081, lr_0 = 4.8288e-04
Loss = 2.7071e-01, PNorm = 39.0825, GNorm = 4.9358, lr_0 = 4.8663e-04
Loss = 2.6103e-01, PNorm = 39.0894, GNorm = 3.8795, lr_0 = 4.9038e-04
Loss = 2.6940e-01, PNorm = 39.0996, GNorm = 6.2750, lr_0 = 4.9412e-04
Loss = 3.4020e-01, PNorm = 39.1035, GNorm = 11.4056, lr_0 = 4.9788e-04
Loss = 3.3009e-01, PNorm = 39.1063, GNorm = 5.4776, lr_0 = 5.0163e-04
Loss = 3.4842e-01, PNorm = 39.1162, GNorm = 8.8531, lr_0 = 5.0538e-04
Loss = 3.1660e-01, PNorm = 39.1348, GNorm = 9.3842, lr_0 = 5.0913e-04
Loss = 2.6261e-01, PNorm = 39.1520, GNorm = 3.7656, lr_0 = 5.1287e-04
Loss = 3.3698e-01, PNorm = 39.1635, GNorm = 5.9081, lr_0 = 5.1663e-04
Loss = 3.1487e-01, PNorm = 39.1779, GNorm = 5.5165, lr_0 = 5.2038e-04
Loss = 2.8384e-01, PNorm = 39.1923, GNorm = 1.0861, lr_0 = 5.2413e-04
Loss = 2.7051e-01, PNorm = 39.2014, GNorm = 1.6449, lr_0 = 5.2788e-04
Loss = 2.7504e-01, PNorm = 39.2132, GNorm = 6.7174, lr_0 = 5.3162e-04
Loss = 3.0262e-01, PNorm = 39.2239, GNorm = 9.2317, lr_0 = 5.3538e-04
Loss = 2.7514e-01, PNorm = 39.2374, GNorm = 4.7749, lr_0 = 5.3912e-04
Loss = 3.0865e-01, PNorm = 39.2543, GNorm = 8.1270, lr_0 = 5.4288e-04
Loss = 2.8486e-01, PNorm = 39.2656, GNorm = 2.9948, lr_0 = 5.4663e-04
Loss = 2.9068e-01, PNorm = 39.2781, GNorm = 6.8641, lr_0 = 5.5038e-04
Validation mae = 0.589248
Epoch 1
Loss = 2.7475e-01, PNorm = 39.2895, GNorm = 4.5757, lr_0 = 5.5413e-04
Loss = 2.6708e-01, PNorm = 39.3055, GNorm = 3.6751, lr_0 = 5.5787e-04
Loss = 2.8375e-01, PNorm = 39.3166, GNorm = 4.2380, lr_0 = 5.6163e-04
Loss = 2.5311e-01, PNorm = 39.3291, GNorm = 4.3140, lr_0 = 5.6538e-04
Loss = 3.4166e-01, PNorm = 39.3422, GNorm = 3.5153, lr_0 = 5.6913e-04
Loss = 2.9194e-01, PNorm = 39.3578, GNorm = 2.8987, lr_0 = 5.7288e-04
Loss = 2.3713e-01, PNorm = 39.3729, GNorm = 1.3594, lr_0 = 5.7662e-04
Loss = 2.6604e-01, PNorm = 39.3888, GNorm = 7.1062, lr_0 = 5.8038e-04
Loss = 2.6039e-01, PNorm = 39.4090, GNorm = 3.8420, lr_0 = 5.8413e-04
Loss = 2.9840e-01, PNorm = 39.4247, GNorm = 5.1594, lr_0 = 5.8788e-04
Loss = 2.1939e-01, PNorm = 39.4390, GNorm = 7.4985, lr_0 = 5.9163e-04
Loss = 2.3364e-01, PNorm = 39.4550, GNorm = 1.0188, lr_0 = 5.9538e-04
Loss = 2.8927e-01, PNorm = 39.4670, GNorm = 15.1636, lr_0 = 5.9913e-04
Loss = 2.6218e-01, PNorm = 39.4823, GNorm = 1.4069, lr_0 = 6.0288e-04
Loss = 3.1382e-01, PNorm = 39.5055, GNorm = 7.1144, lr_0 = 6.0663e-04
Loss = 2.9671e-01, PNorm = 39.5257, GNorm = 0.8989, lr_0 = 6.1038e-04
Loss = 2.7212e-01, PNorm = 39.5443, GNorm = 6.5449, lr_0 = 6.1413e-04
Loss = 2.7783e-01, PNorm = 39.5596, GNorm = 5.5697, lr_0 = 6.1788e-04
Loss = 2.1496e-01, PNorm = 39.5769, GNorm = 0.7846, lr_0 = 6.2163e-04
Loss = 2.5897e-01, PNorm = 39.5928, GNorm = 1.8551, lr_0 = 6.2538e-04
Loss = 2.8348e-01, PNorm = 39.6030, GNorm = 6.9380, lr_0 = 6.2913e-04
Loss = 2.7921e-01, PNorm = 39.6141, GNorm = 5.5051, lr_0 = 6.3288e-04
Loss = 2.7396e-01, PNorm = 39.6277, GNorm = 8.2963, lr_0 = 6.3663e-04
Loss = 2.7183e-01, PNorm = 39.6457, GNorm = 7.1681, lr_0 = 6.4038e-04
Loss = 2.3840e-01, PNorm = 39.6661, GNorm = 5.0788, lr_0 = 6.4413e-04
Loss = 2.3300e-01, PNorm = 39.6822, GNorm = 1.7878, lr_0 = 6.4788e-04
Loss = 2.4292e-01, PNorm = 39.6939, GNorm = 4.1704, lr_0 = 6.5163e-04
Loss = 2.5094e-01, PNorm = 39.7108, GNorm = 4.4476, lr_0 = 6.5538e-04
Loss = 2.4040e-01, PNorm = 39.7232, GNorm = 1.2641, lr_0 = 6.5913e-04
Loss = 2.4251e-01, PNorm = 39.7336, GNorm = 5.9371, lr_0 = 6.6288e-04
Loss = 2.6300e-01, PNorm = 39.7512, GNorm = 1.7487, lr_0 = 6.6663e-04
Loss = 2.5776e-01, PNorm = 39.7616, GNorm = 2.8637, lr_0 = 6.7038e-04
Loss = 2.9680e-01, PNorm = 39.7785, GNorm = 6.4089, lr_0 = 6.7413e-04
Loss = 2.2671e-01, PNorm = 39.8027, GNorm = 3.7755, lr_0 = 6.7788e-04
Loss = 2.5687e-01, PNorm = 39.8188, GNorm = 9.6528, lr_0 = 6.8163e-04
Loss = 3.5089e-01, PNorm = 39.8375, GNorm = 1.4286, lr_0 = 6.8538e-04
Loss = 3.7964e-01, PNorm = 39.8575, GNorm = 4.8209, lr_0 = 6.8913e-04
Loss = 3.7153e-01, PNorm = 39.8928, GNorm = 5.2115, lr_0 = 6.9288e-04
Loss = 3.0715e-01, PNorm = 39.9226, GNorm = 3.5457, lr_0 = 6.9663e-04
Loss = 2.4274e-01, PNorm = 39.9425, GNorm = 1.0250, lr_0 = 7.0038e-04
Loss = 2.6070e-01, PNorm = 39.9618, GNorm = 1.0279, lr_0 = 7.0413e-04
Loss = 2.7073e-01, PNorm = 39.9831, GNorm = 2.2980, lr_0 = 7.0788e-04
Loss = 2.6636e-01, PNorm = 40.0022, GNorm = 1.8839, lr_0 = 7.1163e-04
Loss = 2.3504e-01, PNorm = 40.0203, GNorm = 3.0057, lr_0 = 7.1538e-04
Loss = 2.2692e-01, PNorm = 40.0388, GNorm = 1.0546, lr_0 = 7.1913e-04
Loss = 2.3055e-01, PNorm = 40.0543, GNorm = 7.3761, lr_0 = 7.2288e-04
Loss = 2.6694e-01, PNorm = 40.0709, GNorm = 9.8002, lr_0 = 7.2663e-04
Loss = 3.1034e-01, PNorm = 40.0868, GNorm = 9.8175, lr_0 = 7.3038e-04
Loss = 2.4918e-01, PNorm = 40.1098, GNorm = 3.2949, lr_0 = 7.3413e-04
Loss = 2.2884e-01, PNorm = 40.1314, GNorm = 3.7932, lr_0 = 7.3788e-04
Loss = 2.2612e-01, PNorm = 40.1578, GNorm = 1.5585, lr_0 = 7.4163e-04
Loss = 2.5281e-01, PNorm = 40.1746, GNorm = 2.8931, lr_0 = 7.4538e-04
Loss = 2.5897e-01, PNorm = 40.1878, GNorm = 5.0624, lr_0 = 7.4913e-04
Loss = 2.1708e-01, PNorm = 40.2059, GNorm = 1.4425, lr_0 = 7.5288e-04
Loss = 2.7455e-01, PNorm = 40.2278, GNorm = 7.5249, lr_0 = 7.5663e-04
Loss = 2.6565e-01, PNorm = 40.2567, GNorm = 2.6225, lr_0 = 7.6038e-04
Loss = 2.5505e-01, PNorm = 40.2814, GNorm = 3.1812, lr_0 = 7.6413e-04
Loss = 2.3615e-01, PNorm = 40.3062, GNorm = 1.0574, lr_0 = 7.6788e-04
Loss = 2.2121e-01, PNorm = 40.3275, GNorm = 6.7156, lr_0 = 7.7163e-04
Loss = 2.2860e-01, PNorm = 40.3461, GNorm = 2.5213, lr_0 = 7.7538e-04
Loss = 2.3181e-01, PNorm = 40.3570, GNorm = 1.1170, lr_0 = 7.7913e-04
Loss = 2.9973e-01, PNorm = 40.3793, GNorm = 8.7599, lr_0 = 7.8288e-04
Loss = 2.7621e-01, PNorm = 40.4059, GNorm = 9.0637, lr_0 = 7.8663e-04
Loss = 2.0864e-01, PNorm = 40.4284, GNorm = 2.3625, lr_0 = 7.9038e-04
Loss = 2.2399e-01, PNorm = 40.4546, GNorm = 3.5045, lr_0 = 7.9413e-04
Loss = 2.3971e-01, PNorm = 40.4794, GNorm = 3.5219, lr_0 = 7.9788e-04
Loss = 2.1408e-01, PNorm = 40.5013, GNorm = 2.2839, lr_0 = 8.0163e-04
Loss = 2.2411e-01, PNorm = 40.5162, GNorm = 1.0861, lr_0 = 8.0538e-04
Loss = 2.7424e-01, PNorm = 40.5459, GNorm = 4.8653, lr_0 = 8.0913e-04
Loss = 2.6270e-01, PNorm = 40.5737, GNorm = 4.9702, lr_0 = 8.1288e-04
Loss = 2.1602e-01, PNorm = 40.5890, GNorm = 5.2563, lr_0 = 8.1663e-04
Loss = 2.1229e-01, PNorm = 40.6043, GNorm = 4.8868, lr_0 = 8.2038e-04
Loss = 2.2860e-01, PNorm = 40.6192, GNorm = 2.9456, lr_0 = 8.2413e-04
Loss = 2.1741e-01, PNorm = 40.6535, GNorm = 0.9680, lr_0 = 8.2788e-04
Loss = 2.6677e-01, PNorm = 40.6754, GNorm = 3.0824, lr_0 = 8.3163e-04
Loss = 2.3294e-01, PNorm = 40.6984, GNorm = 1.8635, lr_0 = 8.3538e-04
Loss = 2.2130e-01, PNorm = 40.7220, GNorm = 3.1419, lr_0 = 8.3913e-04
Loss = 2.3446e-01, PNorm = 40.7356, GNorm = 6.4797, lr_0 = 8.4288e-04
Loss = 2.7067e-01, PNorm = 40.7584, GNorm = 3.1498, lr_0 = 8.4663e-04
Loss = 2.5196e-01, PNorm = 40.7806, GNorm = 6.3990, lr_0 = 8.5038e-04
Loss = 2.2581e-01, PNorm = 40.8114, GNorm = 2.3048, lr_0 = 8.5413e-04
Loss = 2.1762e-01, PNorm = 40.8379, GNorm = 4.0879, lr_0 = 8.5788e-04
Loss = 2.4106e-01, PNorm = 40.8722, GNorm = 1.1213, lr_0 = 8.6163e-04
Loss = 2.0210e-01, PNorm = 40.8983, GNorm = 1.7938, lr_0 = 8.6538e-04
Loss = 2.4525e-01, PNorm = 40.9265, GNorm = 2.1738, lr_0 = 8.6913e-04
Loss = 2.1019e-01, PNorm = 40.9505, GNorm = 3.6494, lr_0 = 8.7288e-04
Loss = 2.2381e-01, PNorm = 40.9746, GNorm = 3.9048, lr_0 = 8.7663e-04
Loss = 1.9268e-01, PNorm = 40.9994, GNorm = 1.4997, lr_0 = 8.8038e-04
Loss = 2.5739e-01, PNorm = 41.0242, GNorm = 1.1592, lr_0 = 8.8413e-04
Loss = 2.2903e-01, PNorm = 41.0460, GNorm = 4.5584, lr_0 = 8.8788e-04
Loss = 2.2995e-01, PNorm = 41.0701, GNorm = 3.9313, lr_0 = 8.9163e-04
Loss = 2.2995e-01, PNorm = 41.0982, GNorm = 1.8881, lr_0 = 8.9538e-04
Loss = 2.4262e-01, PNorm = 41.1289, GNorm = 5.8097, lr_0 = 8.9913e-04
Loss = 1.9110e-01, PNorm = 41.1454, GNorm = 1.7153, lr_0 = 9.0288e-04
Loss = 2.5879e-01, PNorm = 41.1680, GNorm = 2.0163, lr_0 = 9.0663e-04
Loss = 3.0270e-01, PNorm = 41.2004, GNorm = 3.4732, lr_0 = 9.1038e-04
Loss = 2.4969e-01, PNorm = 41.2324, GNorm = 3.1948, lr_0 = 9.1413e-04
Loss = 2.1495e-01, PNorm = 41.2716, GNorm = 5.6328, lr_0 = 9.1788e-04
Loss = 2.4155e-01, PNorm = 41.2977, GNorm = 5.2834, lr_0 = 9.2163e-04
Loss = 2.3611e-01, PNorm = 41.3213, GNorm = 2.6646, lr_0 = 9.2538e-04
Loss = 2.1140e-01, PNorm = 41.3466, GNorm = 1.0481, lr_0 = 9.2913e-04
Loss = 1.8634e-01, PNorm = 41.3680, GNorm = 2.1043, lr_0 = 9.3288e-04
Loss = 2.3020e-01, PNorm = 41.3924, GNorm = 1.4384, lr_0 = 9.3663e-04
Loss = 2.3818e-01, PNorm = 41.4166, GNorm = 4.6470, lr_0 = 9.4038e-04
Loss = 2.4332e-01, PNorm = 41.4646, GNorm = 3.7538, lr_0 = 9.4413e-04
Loss = 2.2102e-01, PNorm = 41.4926, GNorm = 2.0562, lr_0 = 9.4788e-04
Loss = 2.3149e-01, PNorm = 41.5254, GNorm = 1.8805, lr_0 = 9.5163e-04
Loss = 2.1214e-01, PNorm = 41.5371, GNorm = 1.0147, lr_0 = 9.5538e-04
Loss = 2.0581e-01, PNorm = 41.5608, GNorm = 0.7917, lr_0 = 9.5913e-04
Loss = 2.2585e-01, PNorm = 41.5872, GNorm = 5.6262, lr_0 = 9.6288e-04
Loss = 2.8718e-01, PNorm = 41.6163, GNorm = 5.6172, lr_0 = 9.6663e-04
Loss = 2.2827e-01, PNorm = 41.6542, GNorm = 4.1890, lr_0 = 9.7038e-04
Loss = 2.4150e-01, PNorm = 41.6902, GNorm = 6.5379, lr_0 = 9.7413e-04
Loss = 2.2370e-01, PNorm = 41.7303, GNorm = 2.1168, lr_0 = 9.7788e-04
Loss = 2.3952e-01, PNorm = 41.7617, GNorm = 3.2855, lr_0 = 9.8163e-04
Loss = 2.2138e-01, PNorm = 41.7936, GNorm = 3.7677, lr_0 = 9.8537e-04
Loss = 2.5589e-01, PNorm = 41.8129, GNorm = 2.3099, lr_0 = 9.8912e-04
Loss = 2.3072e-01, PNorm = 41.8414, GNorm = 1.6658, lr_0 = 9.9288e-04
Loss = 2.0812e-01, PNorm = 41.8666, GNorm = 2.2512, lr_0 = 9.9663e-04
Loss = 2.3110e-01, PNorm = 41.8996, GNorm = 5.4905, lr_0 = 9.9993e-04
Validation mae = 0.531877
Epoch 2
Loss = 2.6922e-01, PNorm = 41.9280, GNorm = 5.0826, lr_0 = 9.9925e-04
Loss = 2.3182e-01, PNorm = 41.9593, GNorm = 4.7776, lr_0 = 9.9856e-04
Loss = 2.4465e-01, PNorm = 41.9983, GNorm = 2.0248, lr_0 = 9.9788e-04
Loss = 2.5973e-01, PNorm = 42.0370, GNorm = 1.4518, lr_0 = 9.9719e-04
Loss = 2.0175e-01, PNorm = 42.0770, GNorm = 3.8128, lr_0 = 9.9651e-04
Loss = 2.0897e-01, PNorm = 42.1039, GNorm = 2.0547, lr_0 = 9.9583e-04
Loss = 2.5569e-01, PNorm = 42.1309, GNorm = 3.5981, lr_0 = 9.9515e-04
Loss = 2.3107e-01, PNorm = 42.1587, GNorm = 2.5442, lr_0 = 9.9446e-04
Loss = 1.9576e-01, PNorm = 42.1858, GNorm = 1.8351, lr_0 = 9.9378e-04
Loss = 1.9560e-01, PNorm = 42.2104, GNorm = 2.6879, lr_0 = 9.9310e-04
Loss = 1.9977e-01, PNorm = 42.2388, GNorm = 1.2989, lr_0 = 9.9242e-04
Loss = 2.2089e-01, PNorm = 42.2664, GNorm = 7.1370, lr_0 = 9.9174e-04
Loss = 2.3825e-01, PNorm = 42.2947, GNorm = 5.7617, lr_0 = 9.9106e-04
Loss = 2.2768e-01, PNorm = 42.3372, GNorm = 3.3525, lr_0 = 9.9038e-04
Loss = 2.1721e-01, PNorm = 42.3757, GNorm = 4.1001, lr_0 = 9.8971e-04
Loss = 1.8301e-01, PNorm = 42.4086, GNorm = 1.9252, lr_0 = 9.8903e-04
Loss = 2.1168e-01, PNorm = 42.4318, GNorm = 2.8749, lr_0 = 9.8835e-04
Loss = 1.9947e-01, PNorm = 42.4544, GNorm = 2.7299, lr_0 = 9.8767e-04
Loss = 2.2032e-01, PNorm = 42.4800, GNorm = 3.5046, lr_0 = 9.8700e-04
Loss = 2.4084e-01, PNorm = 42.5126, GNorm = 1.6595, lr_0 = 9.8632e-04
Loss = 2.0916e-01, PNorm = 42.5453, GNorm = 1.7241, lr_0 = 9.8564e-04
Loss = 1.9111e-01, PNorm = 42.5762, GNorm = 1.3471, lr_0 = 9.8497e-04
Loss = 1.9040e-01, PNorm = 42.6036, GNorm = 1.5089, lr_0 = 9.8429e-04
Loss = 1.8644e-01, PNorm = 42.6236, GNorm = 0.8259, lr_0 = 9.8362e-04
Loss = 1.9704e-01, PNorm = 42.6470, GNorm = 3.3098, lr_0 = 9.8295e-04
Loss = 2.0753e-01, PNorm = 42.6717, GNorm = 4.2942, lr_0 = 9.8227e-04
Loss = 2.0040e-01, PNorm = 42.7005, GNorm = 3.2908, lr_0 = 9.8160e-04
Loss = 2.4266e-01, PNorm = 42.7265, GNorm = 0.5075, lr_0 = 9.8093e-04
Loss = 1.9514e-01, PNorm = 42.7531, GNorm = 3.3742, lr_0 = 9.8026e-04
Loss = 2.2490e-01, PNorm = 42.7850, GNorm = 0.7909, lr_0 = 9.7958e-04
Loss = 1.9396e-01, PNorm = 42.8138, GNorm = 0.8335, lr_0 = 9.7891e-04
Loss = 1.9434e-01, PNorm = 42.8414, GNorm = 3.8504, lr_0 = 9.7824e-04
Loss = 1.9669e-01, PNorm = 42.8678, GNorm = 1.4418, lr_0 = 9.7757e-04
Loss = 1.9757e-01, PNorm = 42.8972, GNorm = 0.6930, lr_0 = 9.7690e-04
Loss = 1.8348e-01, PNorm = 42.9261, GNorm = 1.3176, lr_0 = 9.7623e-04
Loss = 1.8715e-01, PNorm = 42.9386, GNorm = 2.1158, lr_0 = 9.7556e-04
Loss = 1.6945e-01, PNorm = 42.9601, GNorm = 1.5527, lr_0 = 9.7490e-04
Loss = 1.9464e-01, PNorm = 42.9811, GNorm = 0.7621, lr_0 = 9.7423e-04
Loss = 1.9394e-01, PNorm = 42.9970, GNorm = 1.4750, lr_0 = 9.7356e-04
Loss = 1.7268e-01, PNorm = 43.0210, GNorm = 1.3789, lr_0 = 9.7289e-04
Loss = 1.9499e-01, PNorm = 43.0446, GNorm = 3.2073, lr_0 = 9.7223e-04
Loss = 1.9665e-01, PNorm = 43.0607, GNorm = 1.3729, lr_0 = 9.7156e-04
Loss = 1.6916e-01, PNorm = 43.0817, GNorm = 1.3817, lr_0 = 9.7090e-04
Loss = 1.6997e-01, PNorm = 43.1036, GNorm = 1.3881, lr_0 = 9.7023e-04
Loss = 2.1515e-01, PNorm = 43.1267, GNorm = 2.0264, lr_0 = 9.6957e-04
Loss = 2.1625e-01, PNorm = 43.1528, GNorm = 3.1298, lr_0 = 9.6890e-04
Loss = 2.2100e-01, PNorm = 43.1814, GNorm = 3.3369, lr_0 = 9.6824e-04
Loss = 2.0437e-01, PNorm = 43.1996, GNorm = 1.0766, lr_0 = 9.6757e-04
Loss = 2.2392e-01, PNorm = 43.2284, GNorm = 2.1998, lr_0 = 9.6691e-04
Loss = 1.9035e-01, PNorm = 43.2560, GNorm = 0.7974, lr_0 = 9.6625e-04
Loss = 1.9452e-01, PNorm = 43.2799, GNorm = 1.8880, lr_0 = 9.6559e-04
Loss = 2.2500e-01, PNorm = 43.2985, GNorm = 2.9478, lr_0 = 9.6493e-04
Loss = 2.0827e-01, PNorm = 43.3230, GNorm = 3.7735, lr_0 = 9.6427e-04
Loss = 1.9745e-01, PNorm = 43.3559, GNorm = 2.5166, lr_0 = 9.6360e-04
Loss = 1.7575e-01, PNorm = 43.3812, GNorm = 2.6277, lr_0 = 9.6294e-04
Loss = 1.8271e-01, PNorm = 43.4026, GNorm = 2.6705, lr_0 = 9.6228e-04
Loss = 2.0798e-01, PNorm = 43.4295, GNorm = 4.0902, lr_0 = 9.6163e-04
Loss = 2.0888e-01, PNorm = 43.4735, GNorm = 2.6815, lr_0 = 9.6097e-04
Loss = 2.3196e-01, PNorm = 43.5010, GNorm = 2.6138, lr_0 = 9.6031e-04
Loss = 2.1298e-01, PNorm = 43.5306, GNorm = 2.5121, lr_0 = 9.5965e-04
Loss = 2.0245e-01, PNorm = 43.5597, GNorm = 2.2089, lr_0 = 9.5899e-04
Loss = 1.9234e-01, PNorm = 43.5852, GNorm = 0.8714, lr_0 = 9.5834e-04
Loss = 2.3363e-01, PNorm = 43.6099, GNorm = 2.4840, lr_0 = 9.5768e-04
Loss = 2.1802e-01, PNorm = 43.6511, GNorm = 3.1122, lr_0 = 9.5702e-04
Loss = 2.4099e-01, PNorm = 43.6890, GNorm = 2.2571, lr_0 = 9.5637e-04
Loss = 2.1250e-01, PNorm = 43.7276, GNorm = 1.5599, lr_0 = 9.5571e-04
Loss = 2.1202e-01, PNorm = 43.7648, GNorm = 1.7808, lr_0 = 9.5506e-04
Loss = 2.0649e-01, PNorm = 43.7921, GNorm = 0.8124, lr_0 = 9.5440e-04
Loss = 1.7497e-01, PNorm = 43.8170, GNorm = 1.3279, lr_0 = 9.5375e-04
Loss = 1.7909e-01, PNorm = 43.8391, GNorm = 1.2072, lr_0 = 9.5310e-04
Loss = 2.1090e-01, PNorm = 43.8595, GNorm = 0.9767, lr_0 = 9.5244e-04
Loss = 2.0007e-01, PNorm = 43.8754, GNorm = 0.9686, lr_0 = 9.5179e-04
Loss = 2.2143e-01, PNorm = 43.8995, GNorm = 4.5780, lr_0 = 9.5114e-04
Loss = 2.1271e-01, PNorm = 43.9303, GNorm = 2.5566, lr_0 = 9.5049e-04
Loss = 2.1620e-01, PNorm = 43.9603, GNorm = 1.7631, lr_0 = 9.4984e-04
Loss = 1.8695e-01, PNorm = 43.9776, GNorm = 1.0141, lr_0 = 9.4919e-04
Loss = 1.8752e-01, PNorm = 43.9921, GNorm = 1.5374, lr_0 = 9.4854e-04
Loss = 2.0703e-01, PNorm = 44.0211, GNorm = 2.6383, lr_0 = 9.4789e-04
Loss = 1.9726e-01, PNorm = 44.0391, GNorm = 1.3019, lr_0 = 9.4724e-04
Loss = 2.2788e-01, PNorm = 44.0583, GNorm = 1.6367, lr_0 = 9.4659e-04
Loss = 1.9974e-01, PNorm = 44.0877, GNorm = 1.2312, lr_0 = 9.4594e-04
Loss = 2.0292e-01, PNorm = 44.1171, GNorm = 2.8288, lr_0 = 9.4529e-04
Loss = 2.2201e-01, PNorm = 44.1396, GNorm = 0.7500, lr_0 = 9.4464e-04
Loss = 2.0799e-01, PNorm = 44.1676, GNorm = 1.4588, lr_0 = 9.4400e-04
Loss = 1.7287e-01, PNorm = 44.1958, GNorm = 2.0876, lr_0 = 9.4335e-04
Loss = 2.0326e-01, PNorm = 44.2105, GNorm = 0.6679, lr_0 = 9.4270e-04
Loss = 2.0391e-01, PNorm = 44.2333, GNorm = 3.3118, lr_0 = 9.4206e-04
Loss = 1.8928e-01, PNorm = 44.2553, GNorm = 2.7033, lr_0 = 9.4141e-04
Loss = 1.8431e-01, PNorm = 44.2859, GNorm = 1.2817, lr_0 = 9.4077e-04
Loss = 1.5712e-01, PNorm = 44.3147, GNorm = 0.9755, lr_0 = 9.4012e-04
Loss = 1.8899e-01, PNorm = 44.3393, GNorm = 1.3900, lr_0 = 9.3948e-04
Loss = 1.8317e-01, PNorm = 44.3533, GNorm = 2.7745, lr_0 = 9.3884e-04
Loss = 1.9124e-01, PNorm = 44.3771, GNorm = 0.9346, lr_0 = 9.3819e-04
Loss = 1.8116e-01, PNorm = 44.4023, GNorm = 0.7856, lr_0 = 9.3755e-04
Loss = 1.9638e-01, PNorm = 44.4255, GNorm = 2.0506, lr_0 = 9.3691e-04
Loss = 1.8428e-01, PNorm = 44.4451, GNorm = 1.1025, lr_0 = 9.3627e-04
Loss = 1.8955e-01, PNorm = 44.4708, GNorm = 2.8023, lr_0 = 9.3562e-04
Loss = 1.7788e-01, PNorm = 44.4926, GNorm = 0.9188, lr_0 = 9.3498e-04
Loss = 1.8671e-01, PNorm = 44.5194, GNorm = 1.5820, lr_0 = 9.3434e-04
Loss = 1.6206e-01, PNorm = 44.5502, GNorm = 3.1803, lr_0 = 9.3370e-04
Loss = 1.7602e-01, PNorm = 44.5680, GNorm = 3.2446, lr_0 = 9.3306e-04
Loss = 1.8259e-01, PNorm = 44.5877, GNorm = 1.1252, lr_0 = 9.3242e-04
Loss = 1.7242e-01, PNorm = 44.6162, GNorm = 3.7256, lr_0 = 9.3178e-04
Loss = 2.0442e-01, PNorm = 44.6438, GNorm = 0.8283, lr_0 = 9.3115e-04
Loss = 2.0943e-01, PNorm = 44.6769, GNorm = 0.8850, lr_0 = 9.3051e-04
Loss = 1.8837e-01, PNorm = 44.7044, GNorm = 2.1141, lr_0 = 9.2987e-04
Loss = 1.6978e-01, PNorm = 44.7247, GNorm = 0.7861, lr_0 = 9.2923e-04
Loss = 1.8499e-01, PNorm = 44.7463, GNorm = 1.8503, lr_0 = 9.2860e-04
Loss = 1.8937e-01, PNorm = 44.7564, GNorm = 4.6410, lr_0 = 9.2796e-04
Loss = 2.1580e-01, PNorm = 44.7730, GNorm = 3.2991, lr_0 = 9.2733e-04
Loss = 1.9737e-01, PNorm = 44.8035, GNorm = 4.1547, lr_0 = 9.2669e-04
Loss = 1.7045e-01, PNorm = 44.8295, GNorm = 1.1442, lr_0 = 9.2606e-04
Loss = 1.7978e-01, PNorm = 44.8570, GNorm = 1.3651, lr_0 = 9.2542e-04
Loss = 1.7665e-01, PNorm = 44.8720, GNorm = 1.2997, lr_0 = 9.2479e-04
Loss = 2.0269e-01, PNorm = 44.8972, GNorm = 3.1030, lr_0 = 9.2415e-04
Loss = 1.9374e-01, PNorm = 44.9202, GNorm = 1.0155, lr_0 = 9.2352e-04
Loss = 1.7014e-01, PNorm = 44.9544, GNorm = 1.0150, lr_0 = 9.2289e-04
Loss = 1.6349e-01, PNorm = 44.9794, GNorm = 1.6617, lr_0 = 9.2226e-04
Loss = 1.7185e-01, PNorm = 44.9928, GNorm = 1.7173, lr_0 = 9.2162e-04
Loss = 1.7383e-01, PNorm = 45.0078, GNorm = 1.3058, lr_0 = 9.2099e-04
Validation mae = 0.462282
Epoch 3
Loss = 1.6036e-01, PNorm = 45.0154, GNorm = 0.9220, lr_0 = 9.2036e-04
Loss = 1.9185e-01, PNorm = 45.0375, GNorm = 2.4637, lr_0 = 9.1973e-04
Loss = 1.5992e-01, PNorm = 45.0633, GNorm = 0.8991, lr_0 = 9.1910e-04
Loss = 1.5843e-01, PNorm = 45.0821, GNorm = 2.1599, lr_0 = 9.1847e-04
Loss = 1.7803e-01, PNorm = 45.0937, GNorm = 2.1989, lr_0 = 9.1784e-04
Loss = 1.8212e-01, PNorm = 45.1177, GNorm = 6.8657, lr_0 = 9.1721e-04
Loss = 1.9296e-01, PNorm = 45.1463, GNorm = 2.3305, lr_0 = 9.1658e-04
Loss = 2.0523e-01, PNorm = 45.1766, GNorm = 1.8845, lr_0 = 9.1596e-04
Loss = 1.6963e-01, PNorm = 45.2210, GNorm = 1.7240, lr_0 = 9.1533e-04
Loss = 1.6313e-01, PNorm = 45.2453, GNorm = 1.3099, lr_0 = 9.1470e-04
Loss = 1.7563e-01, PNorm = 45.2712, GNorm = 0.8100, lr_0 = 9.1408e-04
Loss = 1.7047e-01, PNorm = 45.2874, GNorm = 0.6065, lr_0 = 9.1345e-04
Loss = 1.6116e-01, PNorm = 45.3051, GNorm = 1.4789, lr_0 = 9.1282e-04
Loss = 1.9179e-01, PNorm = 45.3193, GNorm = 0.9847, lr_0 = 9.1220e-04
Loss = 1.5481e-01, PNorm = 45.3490, GNorm = 1.7715, lr_0 = 9.1157e-04
Loss = 1.6855e-01, PNorm = 45.3740, GNorm = 2.4284, lr_0 = 9.1095e-04
Loss = 1.6729e-01, PNorm = 45.3828, GNorm = 1.1972, lr_0 = 9.1032e-04
Loss = 1.7323e-01, PNorm = 45.3990, GNorm = 0.7533, lr_0 = 9.0970e-04
Loss = 1.6162e-01, PNorm = 45.4116, GNorm = 1.7365, lr_0 = 9.0908e-04
Loss = 1.7077e-01, PNorm = 45.4280, GNorm = 0.8137, lr_0 = 9.0846e-04
Loss = 1.5487e-01, PNorm = 45.4532, GNorm = 0.7648, lr_0 = 9.0783e-04
Loss = 1.6421e-01, PNorm = 45.4849, GNorm = 1.2540, lr_0 = 9.0721e-04
Loss = 1.9184e-01, PNorm = 45.5055, GNorm = 2.2347, lr_0 = 9.0659e-04
Loss = 2.0627e-01, PNorm = 45.5325, GNorm = 0.7486, lr_0 = 9.0597e-04
Loss = 1.6871e-01, PNorm = 45.5643, GNorm = 0.8223, lr_0 = 9.0535e-04
Loss = 1.5277e-01, PNorm = 45.5956, GNorm = 0.9176, lr_0 = 9.0473e-04
Loss = 1.7179e-01, PNorm = 45.6211, GNorm = 0.9785, lr_0 = 9.0411e-04
Loss = 1.9553e-01, PNorm = 45.6440, GNorm = 2.8008, lr_0 = 9.0349e-04
Loss = 2.1656e-01, PNorm = 45.6709, GNorm = 3.4100, lr_0 = 9.0287e-04
Loss = 1.9583e-01, PNorm = 45.6958, GNorm = 0.9242, lr_0 = 9.0225e-04
Loss = 2.1641e-01, PNorm = 45.7247, GNorm = 3.5298, lr_0 = 9.0163e-04
Loss = 1.5973e-01, PNorm = 45.7531, GNorm = 0.9113, lr_0 = 9.0102e-04
Loss = 1.8380e-01, PNorm = 45.7858, GNorm = 1.3001, lr_0 = 9.0040e-04
Loss = 1.7779e-01, PNorm = 45.8074, GNorm = 0.7831, lr_0 = 8.9978e-04
Loss = 1.7269e-01, PNorm = 45.8333, GNorm = 0.7855, lr_0 = 8.9916e-04
Loss = 1.5923e-01, PNorm = 45.8586, GNorm = 2.0108, lr_0 = 8.9855e-04
Loss = 1.7801e-01, PNorm = 45.8846, GNorm = 1.5704, lr_0 = 8.9793e-04
Loss = 1.7555e-01, PNorm = 45.8987, GNorm = 2.8970, lr_0 = 8.9732e-04
Loss = 2.0134e-01, PNorm = 45.9206, GNorm = 3.3336, lr_0 = 8.9670e-04
Loss = 1.8783e-01, PNorm = 45.9470, GNorm = 1.1697, lr_0 = 8.9609e-04
Loss = 1.9502e-01, PNorm = 45.9735, GNorm = 2.0791, lr_0 = 8.9548e-04
Loss = 1.9705e-01, PNorm = 45.9923, GNorm = 2.5328, lr_0 = 8.9486e-04
Loss = 1.6999e-01, PNorm = 46.0178, GNorm = 1.8876, lr_0 = 8.9425e-04
Loss = 1.5847e-01, PNorm = 46.0449, GNorm = 1.2892, lr_0 = 8.9364e-04
Loss = 1.5760e-01, PNorm = 46.0640, GNorm = 2.0443, lr_0 = 8.9302e-04
Loss = 1.7632e-01, PNorm = 46.0848, GNorm = 2.2290, lr_0 = 8.9241e-04
Loss = 2.0306e-01, PNorm = 46.1021, GNorm = 1.7463, lr_0 = 8.9180e-04
Loss = 1.6606e-01, PNorm = 46.1312, GNorm = 0.6560, lr_0 = 8.9119e-04
Loss = 1.5085e-01, PNorm = 46.1561, GNorm = 0.7799, lr_0 = 8.9058e-04
Loss = 1.8073e-01, PNorm = 46.1794, GNorm = 0.9563, lr_0 = 8.8997e-04
Loss = 1.4832e-01, PNorm = 46.1983, GNorm = 0.8488, lr_0 = 8.8936e-04
Loss = 1.4643e-01, PNorm = 46.2194, GNorm = 1.7376, lr_0 = 8.8875e-04
Loss = 1.6338e-01, PNorm = 46.2442, GNorm = 1.5156, lr_0 = 8.8814e-04
Loss = 1.5443e-01, PNorm = 46.2636, GNorm = 0.8669, lr_0 = 8.8753e-04
Loss = 2.0670e-01, PNorm = 46.2844, GNorm = 3.2104, lr_0 = 8.8693e-04
Loss = 1.7699e-01, PNorm = 46.3082, GNorm = 1.4297, lr_0 = 8.8632e-04
Loss = 1.6253e-01, PNorm = 46.3360, GNorm = 0.6438, lr_0 = 8.8571e-04
Loss = 1.6182e-01, PNorm = 46.3610, GNorm = 1.5973, lr_0 = 8.8510e-04
Loss = 1.5375e-01, PNorm = 46.3875, GNorm = 1.1237, lr_0 = 8.8450e-04
Loss = 1.7045e-01, PNorm = 46.4151, GNorm = 1.3031, lr_0 = 8.8389e-04
Loss = 1.6232e-01, PNorm = 46.4406, GNorm = 1.5299, lr_0 = 8.8329e-04
Loss = 1.5397e-01, PNorm = 46.4567, GNorm = 0.5676, lr_0 = 8.8268e-04
Loss = 1.4971e-01, PNorm = 46.4685, GNorm = 1.3530, lr_0 = 8.8208e-04
Loss = 1.6837e-01, PNorm = 46.4885, GNorm = 1.2848, lr_0 = 8.8147e-04
Loss = 1.8156e-01, PNorm = 46.5041, GNorm = 0.9076, lr_0 = 8.8087e-04
Loss = 1.7693e-01, PNorm = 46.5376, GNorm = 1.9767, lr_0 = 8.8026e-04
Loss = 1.6223e-01, PNorm = 46.5615, GNorm = 0.7264, lr_0 = 8.7966e-04
Loss = 1.9068e-01, PNorm = 46.5826, GNorm = 1.9526, lr_0 = 8.7906e-04
Loss = 1.8564e-01, PNorm = 46.6157, GNorm = 1.8872, lr_0 = 8.7846e-04
Loss = 2.1923e-01, PNorm = 46.6510, GNorm = 3.4517, lr_0 = 8.7785e-04
Loss = 1.6838e-01, PNorm = 46.6746, GNorm = 0.6650, lr_0 = 8.7725e-04
Loss = 1.9648e-01, PNorm = 46.7031, GNorm = 1.0614, lr_0 = 8.7665e-04
Loss = 1.8083e-01, PNorm = 46.7244, GNorm = 1.2784, lr_0 = 8.7605e-04
Loss = 1.6180e-01, PNorm = 46.7387, GNorm = 0.7299, lr_0 = 8.7545e-04
Loss = 1.6878e-01, PNorm = 46.7697, GNorm = 1.2557, lr_0 = 8.7485e-04
Loss = 1.6916e-01, PNorm = 46.7860, GNorm = 1.1696, lr_0 = 8.7425e-04
Loss = 1.5208e-01, PNorm = 46.8000, GNorm = 1.5011, lr_0 = 8.7365e-04
Loss = 1.6681e-01, PNorm = 46.8225, GNorm = 1.1340, lr_0 = 8.7306e-04
Loss = 1.6808e-01, PNorm = 46.8447, GNorm = 2.0283, lr_0 = 8.7246e-04
Loss = 2.0568e-01, PNorm = 46.8740, GNorm = 1.9213, lr_0 = 8.7186e-04
Loss = 1.8805e-01, PNorm = 46.8952, GNorm = 1.5059, lr_0 = 8.7126e-04
Loss = 1.7846e-01, PNorm = 46.9179, GNorm = 2.0571, lr_0 = 8.7067e-04
Loss = 1.6145e-01, PNorm = 46.9407, GNorm = 0.8536, lr_0 = 8.7007e-04
Loss = 1.7178e-01, PNorm = 46.9685, GNorm = 1.2835, lr_0 = 8.6947e-04
Loss = 1.5924e-01, PNorm = 46.9852, GNorm = 1.3983, lr_0 = 8.6888e-04
Loss = 1.7184e-01, PNorm = 47.0031, GNorm = 0.9984, lr_0 = 8.6828e-04
Loss = 1.6550e-01, PNorm = 47.0193, GNorm = 1.4573, lr_0 = 8.6769e-04
Loss = 1.7204e-01, PNorm = 47.0350, GNorm = 1.0434, lr_0 = 8.6709e-04
Loss = 1.5363e-01, PNorm = 47.0526, GNorm = 1.2040, lr_0 = 8.6650e-04
Loss = 1.7242e-01, PNorm = 47.0679, GNorm = 1.2832, lr_0 = 8.6590e-04
Loss = 1.6474e-01, PNorm = 47.0919, GNorm = 1.2863, lr_0 = 8.6531e-04
Loss = 1.5148e-01, PNorm = 47.1102, GNorm = 1.5500, lr_0 = 8.6472e-04
Loss = 1.4795e-01, PNorm = 47.1268, GNorm = 1.3014, lr_0 = 8.6413e-04
Loss = 1.5927e-01, PNorm = 47.1471, GNorm = 2.1145, lr_0 = 8.6353e-04
Loss = 1.6640e-01, PNorm = 47.1661, GNorm = 1.3161, lr_0 = 8.6294e-04
Loss = 1.5161e-01, PNorm = 47.1888, GNorm = 1.7014, lr_0 = 8.6235e-04
Loss = 1.6881e-01, PNorm = 47.1993, GNorm = 3.5970, lr_0 = 8.6176e-04
Loss = 1.7372e-01, PNorm = 47.2153, GNorm = 0.6572, lr_0 = 8.6117e-04
Loss = 1.9197e-01, PNorm = 47.2459, GNorm = 2.3830, lr_0 = 8.6058e-04
Loss = 1.6940e-01, PNorm = 47.2763, GNorm = 2.5877, lr_0 = 8.5999e-04
Loss = 1.5722e-01, PNorm = 47.2963, GNorm = 1.2580, lr_0 = 8.5940e-04
Loss = 1.8029e-01, PNorm = 47.3182, GNorm = 0.8081, lr_0 = 8.5881e-04
Loss = 1.6737e-01, PNorm = 47.3469, GNorm = 2.0088, lr_0 = 8.5823e-04
Loss = 1.8913e-01, PNorm = 47.3664, GNorm = 1.8393, lr_0 = 8.5764e-04
Loss = 1.8702e-01, PNorm = 47.3955, GNorm = 0.6697, lr_0 = 8.5705e-04
Loss = 1.4012e-01, PNorm = 47.4216, GNorm = 0.4909, lr_0 = 8.5646e-04
Loss = 1.6213e-01, PNorm = 47.4338, GNorm = 1.4362, lr_0 = 8.5588e-04
Loss = 1.7448e-01, PNorm = 47.4584, GNorm = 0.9292, lr_0 = 8.5529e-04
Loss = 1.7727e-01, PNorm = 47.4818, GNorm = 0.9088, lr_0 = 8.5470e-04
Loss = 1.6674e-01, PNorm = 47.5042, GNorm = 1.2867, lr_0 = 8.5412e-04
Loss = 1.9766e-01, PNorm = 47.5236, GNorm = 1.0316, lr_0 = 8.5353e-04
Loss = 1.6353e-01, PNorm = 47.5397, GNorm = 1.5231, lr_0 = 8.5295e-04
Loss = 1.4964e-01, PNorm = 47.5523, GNorm = 2.4275, lr_0 = 8.5236e-04
Loss = 1.5834e-01, PNorm = 47.5690, GNorm = 1.4145, lr_0 = 8.5178e-04
Loss = 1.6359e-01, PNorm = 47.5870, GNorm = 1.9462, lr_0 = 8.5120e-04
Loss = 1.4772e-01, PNorm = 47.6123, GNorm = 0.4846, lr_0 = 8.5061e-04
Loss = 1.4908e-01, PNorm = 47.6364, GNorm = 0.7297, lr_0 = 8.5003e-04
Loss = 1.6367e-01, PNorm = 47.6569, GNorm = 0.8979, lr_0 = 8.4945e-04
Loss = 1.4631e-01, PNorm = 47.6697, GNorm = 1.0025, lr_0 = 8.4887e-04
Loss = 1.5378e-01, PNorm = 47.6877, GNorm = 0.7810, lr_0 = 8.4828e-04
Validation mae = 0.457216
Epoch 4
Loss = 1.3561e-01, PNorm = 47.7009, GNorm = 1.1961, lr_0 = 8.4770e-04
Loss = 1.5971e-01, PNorm = 47.7140, GNorm = 0.7875, lr_0 = 8.4712e-04
Loss = 1.5185e-01, PNorm = 47.7351, GNorm = 2.1643, lr_0 = 8.4654e-04
Loss = 1.9652e-01, PNorm = 47.7565, GNorm = 0.6829, lr_0 = 8.4596e-04
Loss = 1.8785e-01, PNorm = 47.7828, GNorm = 4.6065, lr_0 = 8.4538e-04
Loss = 1.6380e-01, PNorm = 47.8089, GNorm = 1.6482, lr_0 = 8.4480e-04
Loss = 1.7349e-01, PNorm = 47.8415, GNorm = 1.4186, lr_0 = 8.4423e-04
Loss = 1.7761e-01, PNorm = 47.8622, GNorm = 0.7481, lr_0 = 8.4365e-04
Loss = 1.4973e-01, PNorm = 47.8840, GNorm = 0.5080, lr_0 = 8.4307e-04
Loss = 1.5091e-01, PNorm = 47.9124, GNorm = 1.7095, lr_0 = 8.4249e-04
Loss = 1.4184e-01, PNorm = 47.9379, GNorm = 0.8331, lr_0 = 8.4191e-04
Loss = 1.6282e-01, PNorm = 47.9537, GNorm = 2.1663, lr_0 = 8.4134e-04
Loss = 2.0221e-01, PNorm = 47.9628, GNorm = 1.5479, lr_0 = 8.4076e-04
Loss = 1.5493e-01, PNorm = 47.9900, GNorm = 2.5276, lr_0 = 8.4019e-04
Loss = 1.7729e-01, PNorm = 48.0211, GNorm = 1.0467, lr_0 = 8.3961e-04
Loss = 1.6981e-01, PNorm = 48.0529, GNorm = 0.6076, lr_0 = 8.3903e-04
Loss = 1.4031e-01, PNorm = 48.0829, GNorm = 1.2993, lr_0 = 8.3846e-04
Loss = 2.0215e-01, PNorm = 48.0991, GNorm = 5.0241, lr_0 = 8.3789e-04
Loss = 1.6629e-01, PNorm = 48.1300, GNorm = 0.9248, lr_0 = 8.3731e-04
Loss = 1.6322e-01, PNorm = 48.1555, GNorm = 0.6159, lr_0 = 8.3674e-04
Loss = 1.5400e-01, PNorm = 48.1761, GNorm = 2.5655, lr_0 = 8.3616e-04
Loss = 1.5384e-01, PNorm = 48.2038, GNorm = 1.7070, lr_0 = 8.3559e-04
Loss = 1.6175e-01, PNorm = 48.2345, GNorm = 1.1329, lr_0 = 8.3502e-04
Loss = 1.5137e-01, PNorm = 48.2590, GNorm = 1.2272, lr_0 = 8.3445e-04
Loss = 1.5522e-01, PNorm = 48.2792, GNorm = 0.8810, lr_0 = 8.3388e-04
Loss = 1.3700e-01, PNorm = 48.2983, GNorm = 0.8833, lr_0 = 8.3330e-04
Loss = 1.3901e-01, PNorm = 48.3166, GNorm = 0.9790, lr_0 = 8.3273e-04
Loss = 1.4127e-01, PNorm = 48.3313, GNorm = 1.1774, lr_0 = 8.3216e-04
Loss = 1.3380e-01, PNorm = 48.3450, GNorm = 0.6676, lr_0 = 8.3159e-04
Loss = 1.4251e-01, PNorm = 48.3574, GNorm = 0.9088, lr_0 = 8.3102e-04
Loss = 1.5263e-01, PNorm = 48.3767, GNorm = 0.7007, lr_0 = 8.3045e-04
Loss = 1.5468e-01, PNorm = 48.3984, GNorm = 2.1296, lr_0 = 8.2988e-04
Loss = 1.5306e-01, PNorm = 48.4180, GNorm = 0.9392, lr_0 = 8.2932e-04
Loss = 1.4294e-01, PNorm = 48.4361, GNorm = 0.7738, lr_0 = 8.2875e-04
Loss = 1.5994e-01, PNorm = 48.4600, GNorm = 2.7363, lr_0 = 8.2818e-04
Loss = 1.7570e-01, PNorm = 48.4831, GNorm = 1.3929, lr_0 = 8.2761e-04
Loss = 1.5311e-01, PNorm = 48.5110, GNorm = 1.2532, lr_0 = 8.2705e-04
Loss = 1.2996e-01, PNorm = 48.5338, GNorm = 1.0616, lr_0 = 8.2648e-04
Loss = 1.4898e-01, PNorm = 48.5572, GNorm = 2.2413, lr_0 = 8.2591e-04
Loss = 1.3207e-01, PNorm = 48.5699, GNorm = 1.4034, lr_0 = 8.2535e-04
Loss = 1.4790e-01, PNorm = 48.5906, GNorm = 0.9704, lr_0 = 8.2478e-04
Loss = 1.4596e-01, PNorm = 48.6068, GNorm = 0.8469, lr_0 = 8.2422e-04
Loss = 1.3896e-01, PNorm = 48.6232, GNorm = 1.9641, lr_0 = 8.2365e-04
Loss = 1.5199e-01, PNorm = 48.6437, GNorm = 2.3258, lr_0 = 8.2309e-04
Loss = 1.6287e-01, PNorm = 48.6673, GNorm = 2.8300, lr_0 = 8.2252e-04
Loss = 1.5659e-01, PNorm = 48.6887, GNorm = 1.1628, lr_0 = 8.2196e-04
Loss = 1.6196e-01, PNorm = 48.7074, GNorm = 1.0059, lr_0 = 8.2140e-04
Loss = 1.7324e-01, PNorm = 48.7282, GNorm = 1.5192, lr_0 = 8.2084e-04
Loss = 1.5546e-01, PNorm = 48.7513, GNorm = 3.5750, lr_0 = 8.2027e-04
Loss = 1.6825e-01, PNorm = 48.7761, GNorm = 2.4267, lr_0 = 8.1971e-04
Loss = 1.6414e-01, PNorm = 48.7967, GNorm = 0.7693, lr_0 = 8.1915e-04
Loss = 1.3745e-01, PNorm = 48.8154, GNorm = 1.8299, lr_0 = 8.1859e-04
Loss = 1.5708e-01, PNorm = 48.8304, GNorm = 0.6943, lr_0 = 8.1803e-04
Loss = 1.5280e-01, PNorm = 48.8491, GNorm = 1.6948, lr_0 = 8.1747e-04
Loss = 1.4472e-01, PNorm = 48.8691, GNorm = 2.0769, lr_0 = 8.1691e-04
Loss = 1.4942e-01, PNorm = 48.8885, GNorm = 2.1074, lr_0 = 8.1635e-04
Loss = 1.4587e-01, PNorm = 48.9082, GNorm = 0.8709, lr_0 = 8.1579e-04
Loss = 1.3888e-01, PNorm = 48.9264, GNorm = 1.2937, lr_0 = 8.1523e-04
Loss = 1.2468e-01, PNorm = 48.9481, GNorm = 1.0113, lr_0 = 8.1467e-04
Loss = 1.3167e-01, PNorm = 48.9646, GNorm = 1.5512, lr_0 = 8.1411e-04
Loss = 1.3218e-01, PNorm = 48.9811, GNorm = 1.0401, lr_0 = 8.1355e-04
Loss = 1.4697e-01, PNorm = 49.0021, GNorm = 1.8265, lr_0 = 8.1300e-04
Loss = 1.4139e-01, PNorm = 49.0262, GNorm = 0.9138, lr_0 = 8.1244e-04
Loss = 1.7892e-01, PNorm = 49.0469, GNorm = 0.8961, lr_0 = 8.1188e-04
Loss = 1.5057e-01, PNorm = 49.0698, GNorm = 1.1337, lr_0 = 8.1133e-04
Loss = 1.3316e-01, PNorm = 49.0872, GNorm = 1.5481, lr_0 = 8.1077e-04
Loss = 1.3681e-01, PNorm = 49.0969, GNorm = 0.6665, lr_0 = 8.1022e-04
Loss = 1.4488e-01, PNorm = 49.1158, GNorm = 1.2949, lr_0 = 8.0966e-04
Loss = 1.4955e-01, PNorm = 49.1332, GNorm = 2.1266, lr_0 = 8.0911e-04
Loss = 1.5050e-01, PNorm = 49.1528, GNorm = 1.0086, lr_0 = 8.0855e-04
Loss = 1.3947e-01, PNorm = 49.1692, GNorm = 1.0033, lr_0 = 8.0800e-04
Loss = 1.4409e-01, PNorm = 49.1775, GNorm = 2.1972, lr_0 = 8.0745e-04
Loss = 1.4852e-01, PNorm = 49.1984, GNorm = 1.5173, lr_0 = 8.0689e-04
Loss = 1.4189e-01, PNorm = 49.2237, GNorm = 0.5935, lr_0 = 8.0634e-04
Loss = 1.2969e-01, PNorm = 49.2472, GNorm = 0.7526, lr_0 = 8.0579e-04
Loss = 1.4685e-01, PNorm = 49.2636, GNorm = 1.7529, lr_0 = 8.0523e-04
Loss = 1.6326e-01, PNorm = 49.2904, GNorm = 2.9989, lr_0 = 8.0468e-04
Loss = 1.5187e-01, PNorm = 49.3167, GNorm = 0.7502, lr_0 = 8.0413e-04
Loss = 1.5285e-01, PNorm = 49.3500, GNorm = 1.0474, lr_0 = 8.0358e-04
Loss = 1.4116e-01, PNorm = 49.3757, GNorm = 0.7217, lr_0 = 8.0303e-04
Loss = 1.4204e-01, PNorm = 49.3993, GNorm = 2.9374, lr_0 = 8.0248e-04
Loss = 1.3532e-01, PNorm = 49.4184, GNorm = 0.7077, lr_0 = 8.0193e-04
Loss = 1.5420e-01, PNorm = 49.4326, GNorm = 0.6630, lr_0 = 8.0138e-04
Loss = 1.6456e-01, PNorm = 49.4494, GNorm = 0.7347, lr_0 = 8.0083e-04
Loss = 1.4279e-01, PNorm = 49.4719, GNorm = 1.2030, lr_0 = 8.0028e-04
Loss = 1.4931e-01, PNorm = 49.4919, GNorm = 2.6870, lr_0 = 7.9974e-04
Loss = 1.6425e-01, PNorm = 49.5128, GNorm = 1.9185, lr_0 = 7.9919e-04
Loss = 1.5675e-01, PNorm = 49.5270, GNorm = 1.0103, lr_0 = 7.9864e-04
Loss = 1.4527e-01, PNorm = 49.5511, GNorm = 1.9633, lr_0 = 7.9809e-04
Loss = 1.3186e-01, PNorm = 49.5751, GNorm = 0.7845, lr_0 = 7.9755e-04
Loss = 1.6270e-01, PNorm = 49.5877, GNorm = 1.0084, lr_0 = 7.9700e-04
Loss = 1.7255e-01, PNorm = 49.6087, GNorm = 0.6893, lr_0 = 7.9645e-04
Loss = 1.3390e-01, PNorm = 49.6257, GNorm = 0.6379, lr_0 = 7.9591e-04
Loss = 1.3394e-01, PNorm = 49.6424, GNorm = 0.6545, lr_0 = 7.9536e-04
Loss = 1.6615e-01, PNorm = 49.6616, GNorm = 0.7570, lr_0 = 7.9482e-04
Loss = 1.7938e-01, PNorm = 49.6896, GNorm = 1.1850, lr_0 = 7.9427e-04
Loss = 1.7253e-01, PNorm = 49.7221, GNorm = 1.0434, lr_0 = 7.9373e-04
Loss = 1.3266e-01, PNorm = 49.7411, GNorm = 1.8895, lr_0 = 7.9319e-04
Loss = 1.5408e-01, PNorm = 49.7648, GNorm = 1.4208, lr_0 = 7.9264e-04
Loss = 1.4016e-01, PNorm = 49.7843, GNorm = 1.1685, lr_0 = 7.9210e-04
Loss = 1.9162e-01, PNorm = 49.7995, GNorm = 0.7928, lr_0 = 7.9156e-04
Loss = 1.6139e-01, PNorm = 49.8216, GNorm = 1.9713, lr_0 = 7.9101e-04
Loss = 1.5212e-01, PNorm = 49.8447, GNorm = 0.8756, lr_0 = 7.9047e-04
Loss = 1.3613e-01, PNorm = 49.8680, GNorm = 1.4481, lr_0 = 7.8993e-04
Loss = 1.6797e-01, PNorm = 49.8866, GNorm = 2.4496, lr_0 = 7.8939e-04
Loss = 1.2454e-01, PNorm = 49.9081, GNorm = 0.6781, lr_0 = 7.8885e-04
Loss = 1.6506e-01, PNorm = 49.9203, GNorm = 0.9789, lr_0 = 7.8831e-04
Loss = 1.5006e-01, PNorm = 49.9319, GNorm = 1.0090, lr_0 = 7.8777e-04
Loss = 1.4579e-01, PNorm = 49.9471, GNorm = 0.8860, lr_0 = 7.8723e-04
Loss = 1.4237e-01, PNorm = 49.9601, GNorm = 0.8705, lr_0 = 7.8669e-04
Loss = 1.4185e-01, PNorm = 49.9788, GNorm = 2.3917, lr_0 = 7.8615e-04
Loss = 1.5915e-01, PNorm = 50.0043, GNorm = 0.9206, lr_0 = 7.8561e-04
Loss = 1.6071e-01, PNorm = 50.0335, GNorm = 0.7097, lr_0 = 7.8507e-04
Loss = 1.4639e-01, PNorm = 50.0629, GNorm = 0.9129, lr_0 = 7.8454e-04
Loss = 1.6363e-01, PNorm = 50.0821, GNorm = 0.8857, lr_0 = 7.8400e-04
Loss = 1.5073e-01, PNorm = 50.0999, GNorm = 1.4738, lr_0 = 7.8346e-04
Loss = 1.3956e-01, PNorm = 50.1230, GNorm = 0.7409, lr_0 = 7.8293e-04
Loss = 1.4401e-01, PNorm = 50.1399, GNorm = 0.7056, lr_0 = 7.8239e-04
Loss = 1.5171e-01, PNorm = 50.1580, GNorm = 2.1533, lr_0 = 7.8185e-04
Loss = 1.5936e-01, PNorm = 50.1794, GNorm = 3.7344, lr_0 = 7.8132e-04
Validation mae = 0.473482
Epoch 5
Loss = 1.5572e-01, PNorm = 50.1982, GNorm = 0.9700, lr_0 = 7.8078e-04
Loss = 1.4788e-01, PNorm = 50.2220, GNorm = 1.0301, lr_0 = 7.8025e-04
Loss = 1.5948e-01, PNorm = 50.2460, GNorm = 1.9699, lr_0 = 7.7971e-04
Loss = 1.5386e-01, PNorm = 50.2704, GNorm = 0.6554, lr_0 = 7.7918e-04
Loss = 1.3165e-01, PNorm = 50.2934, GNorm = 1.4680, lr_0 = 7.7864e-04
Loss = 1.5773e-01, PNorm = 50.3121, GNorm = 1.1861, lr_0 = 7.7811e-04
Loss = 1.2608e-01, PNorm = 50.3426, GNorm = 0.7349, lr_0 = 7.7758e-04
Loss = 1.2305e-01, PNorm = 50.3697, GNorm = 1.4465, lr_0 = 7.7705e-04
Loss = 1.3980e-01, PNorm = 50.3956, GNorm = 0.6970, lr_0 = 7.7651e-04
Loss = 1.2400e-01, PNorm = 50.4076, GNorm = 0.8920, lr_0 = 7.7598e-04
Loss = 1.5045e-01, PNorm = 50.4274, GNorm = 0.9902, lr_0 = 7.7545e-04
Loss = 1.3099e-01, PNorm = 50.4425, GNorm = 0.7393, lr_0 = 7.7492e-04
Loss = 1.1249e-01, PNorm = 50.4550, GNorm = 1.9912, lr_0 = 7.7439e-04
Loss = 1.4514e-01, PNorm = 50.4642, GNorm = 1.2826, lr_0 = 7.7386e-04
Loss = 1.5450e-01, PNorm = 50.4901, GNorm = 1.0389, lr_0 = 7.7333e-04
Loss = 1.4337e-01, PNorm = 50.5103, GNorm = 1.8201, lr_0 = 7.7280e-04
Loss = 1.7155e-01, PNorm = 50.5304, GNorm = 0.8227, lr_0 = 7.7227e-04
Loss = 1.6919e-01, PNorm = 50.5554, GNorm = 1.7726, lr_0 = 7.7174e-04
Loss = 1.3035e-01, PNorm = 50.5833, GNorm = 1.0549, lr_0 = 7.7121e-04
Loss = 1.4397e-01, PNorm = 50.6031, GNorm = 1.1690, lr_0 = 7.7068e-04
Loss = 1.3651e-01, PNorm = 50.6252, GNorm = 0.7982, lr_0 = 7.7015e-04
Loss = 1.4172e-01, PNorm = 50.6420, GNorm = 0.6662, lr_0 = 7.6963e-04
Loss = 1.2682e-01, PNorm = 50.6596, GNorm = 1.8904, lr_0 = 7.6910e-04
Loss = 1.3029e-01, PNorm = 50.6702, GNorm = 0.9759, lr_0 = 7.6857e-04
Loss = 1.2273e-01, PNorm = 50.6860, GNorm = 1.1617, lr_0 = 7.6805e-04
Loss = 1.1875e-01, PNorm = 50.7004, GNorm = 0.7439, lr_0 = 7.6752e-04
Loss = 1.4518e-01, PNorm = 50.7178, GNorm = 1.3426, lr_0 = 7.6699e-04
Loss = 1.3826e-01, PNorm = 50.7319, GNorm = 0.9312, lr_0 = 7.6647e-04
Loss = 1.3816e-01, PNorm = 50.7573, GNorm = 0.7638, lr_0 = 7.6594e-04
Loss = 1.2283e-01, PNorm = 50.7767, GNorm = 1.9505, lr_0 = 7.6542e-04
Loss = 1.1517e-01, PNorm = 50.7897, GNorm = 1.2302, lr_0 = 7.6489e-04
Loss = 1.2362e-01, PNorm = 50.8004, GNorm = 2.7138, lr_0 = 7.6437e-04
Loss = 1.3413e-01, PNorm = 50.8196, GNorm = 0.6733, lr_0 = 7.6385e-04
Loss = 1.2158e-01, PNorm = 50.8376, GNorm = 0.5787, lr_0 = 7.6332e-04
Loss = 1.4544e-01, PNorm = 50.8552, GNorm = 0.8624, lr_0 = 7.6280e-04
Loss = 1.3576e-01, PNorm = 50.8761, GNorm = 0.8864, lr_0 = 7.6228e-04
Loss = 1.3463e-01, PNorm = 50.9014, GNorm = 0.7593, lr_0 = 7.6176e-04
Loss = 1.3591e-01, PNorm = 50.9315, GNorm = 1.5371, lr_0 = 7.6123e-04
Loss = 1.2779e-01, PNorm = 50.9591, GNorm = 1.1634, lr_0 = 7.6071e-04
Loss = 1.3086e-01, PNorm = 50.9729, GNorm = 1.3138, lr_0 = 7.6019e-04
Loss = 1.4555e-01, PNorm = 50.9795, GNorm = 1.0507, lr_0 = 7.5967e-04
Loss = 1.2364e-01, PNorm = 51.0010, GNorm = 0.6030, lr_0 = 7.5915e-04
Loss = 1.4405e-01, PNorm = 51.0252, GNorm = 0.5744, lr_0 = 7.5863e-04
Loss = 1.4806e-01, PNorm = 51.0399, GNorm = 1.6093, lr_0 = 7.5811e-04
Loss = 1.4371e-01, PNorm = 51.0624, GNorm = 0.5295, lr_0 = 7.5759e-04
Loss = 1.3862e-01, PNorm = 51.0794, GNorm = 0.7030, lr_0 = 7.5707e-04
Loss = 1.3596e-01, PNorm = 51.0944, GNorm = 0.8888, lr_0 = 7.5655e-04
Loss = 1.4426e-01, PNorm = 51.1204, GNorm = 2.5418, lr_0 = 7.5603e-04
Loss = 1.4409e-01, PNorm = 51.1402, GNorm = 1.9477, lr_0 = 7.5552e-04
Loss = 1.6384e-01, PNorm = 51.1667, GNorm = 2.3824, lr_0 = 7.5500e-04
Loss = 1.4881e-01, PNorm = 51.1879, GNorm = 0.7998, lr_0 = 7.5448e-04
Loss = 1.1262e-01, PNorm = 51.2085, GNorm = 0.7189, lr_0 = 7.5397e-04
Loss = 1.4154e-01, PNorm = 51.2269, GNorm = 1.0158, lr_0 = 7.5345e-04
Loss = 1.4552e-01, PNorm = 51.2449, GNorm = 1.3012, lr_0 = 7.5293e-04
Loss = 1.6196e-01, PNorm = 51.2658, GNorm = 0.9219, lr_0 = 7.5242e-04
Loss = 1.2652e-01, PNorm = 51.2840, GNorm = 0.6312, lr_0 = 7.5190e-04
Loss = 1.4977e-01, PNorm = 51.2957, GNorm = 0.7910, lr_0 = 7.5139e-04
Loss = 1.3583e-01, PNorm = 51.2995, GNorm = 1.1969, lr_0 = 7.5087e-04
Loss = 1.2461e-01, PNorm = 51.3109, GNorm = 0.7527, lr_0 = 7.5036e-04
Loss = 1.2794e-01, PNorm = 51.3292, GNorm = 1.4725, lr_0 = 7.4984e-04
Loss = 1.3188e-01, PNorm = 51.3517, GNorm = 1.0792, lr_0 = 7.4933e-04
Loss = 1.4420e-01, PNorm = 51.3704, GNorm = 0.8399, lr_0 = 7.4882e-04
Loss = 1.3870e-01, PNorm = 51.3842, GNorm = 0.8096, lr_0 = 7.4830e-04
Loss = 1.1857e-01, PNorm = 51.4010, GNorm = 0.8646, lr_0 = 7.4779e-04
Loss = 1.2923e-01, PNorm = 51.4128, GNorm = 0.8791, lr_0 = 7.4728e-04
Loss = 1.5597e-01, PNorm = 51.4245, GNorm = 0.9117, lr_0 = 7.4677e-04
Loss = 1.3738e-01, PNorm = 51.4351, GNorm = 1.2628, lr_0 = 7.4625e-04
Loss = 1.4348e-01, PNorm = 51.4569, GNorm = 0.9893, lr_0 = 7.4574e-04
Loss = 1.2985e-01, PNorm = 51.4885, GNorm = 0.7663, lr_0 = 7.4523e-04
Loss = 1.3053e-01, PNorm = 51.5100, GNorm = 0.7941, lr_0 = 7.4472e-04
Loss = 1.5218e-01, PNorm = 51.5281, GNorm = 0.8058, lr_0 = 7.4421e-04
Loss = 1.3170e-01, PNorm = 51.5398, GNorm = 1.2144, lr_0 = 7.4370e-04
Loss = 1.3657e-01, PNorm = 51.5483, GNorm = 0.8056, lr_0 = 7.4319e-04
Loss = 1.3871e-01, PNorm = 51.5525, GNorm = 0.7658, lr_0 = 7.4268e-04
Loss = 1.5722e-01, PNorm = 51.5640, GNorm = 1.9070, lr_0 = 7.4217e-04
Loss = 1.4564e-01, PNorm = 51.5820, GNorm = 1.7623, lr_0 = 7.4167e-04
Loss = 1.4747e-01, PNorm = 51.6048, GNorm = 1.4197, lr_0 = 7.4116e-04
Loss = 1.5748e-01, PNorm = 51.6309, GNorm = 0.8640, lr_0 = 7.4065e-04
Loss = 1.2624e-01, PNorm = 51.6533, GNorm = 2.0097, lr_0 = 7.4014e-04
Loss = 1.1343e-01, PNorm = 51.6727, GNorm = 0.4998, lr_0 = 7.3964e-04
Loss = 1.3107e-01, PNorm = 51.6856, GNorm = 1.1885, lr_0 = 7.3913e-04
Loss = 1.2963e-01, PNorm = 51.7035, GNorm = 1.0794, lr_0 = 7.3862e-04
Loss = 1.3437e-01, PNorm = 51.7239, GNorm = 2.2649, lr_0 = 7.3812e-04
Loss = 1.5122e-01, PNorm = 51.7458, GNorm = 1.1894, lr_0 = 7.3761e-04
Loss = 1.3965e-01, PNorm = 51.7610, GNorm = 2.4175, lr_0 = 7.3711e-04
Loss = 1.5831e-01, PNorm = 51.7785, GNorm = 1.3291, lr_0 = 7.3660e-04
Loss = 1.3171e-01, PNorm = 51.8029, GNorm = 0.9163, lr_0 = 7.3610e-04
Loss = 1.4508e-01, PNorm = 51.8182, GNorm = 0.9835, lr_0 = 7.3559e-04
Loss = 1.3730e-01, PNorm = 51.8380, GNorm = 1.0878, lr_0 = 7.3509e-04
Loss = 1.5344e-01, PNorm = 51.8595, GNorm = 2.3904, lr_0 = 7.3458e-04
Loss = 1.3393e-01, PNorm = 51.8772, GNorm = 0.9795, lr_0 = 7.3408e-04
Loss = 1.3738e-01, PNorm = 51.8971, GNorm = 0.9220, lr_0 = 7.3358e-04
Loss = 1.2852e-01, PNorm = 51.9205, GNorm = 2.2133, lr_0 = 7.3308e-04
Loss = 1.3506e-01, PNorm = 51.9390, GNorm = 0.9302, lr_0 = 7.3257e-04
Loss = 1.5139e-01, PNorm = 51.9499, GNorm = 1.1910, lr_0 = 7.3207e-04
Loss = 1.3763e-01, PNorm = 51.9639, GNorm = 1.2688, lr_0 = 7.3157e-04
Loss = 1.5290e-01, PNorm = 51.9824, GNorm = 0.7709, lr_0 = 7.3107e-04
Loss = 1.6236e-01, PNorm = 52.0029, GNorm = 1.0512, lr_0 = 7.3057e-04
Loss = 1.6657e-01, PNorm = 52.0265, GNorm = 1.9892, lr_0 = 7.3007e-04
Loss = 1.5217e-01, PNorm = 52.0448, GNorm = 1.3882, lr_0 = 7.2957e-04
Loss = 1.2106e-01, PNorm = 52.0553, GNorm = 0.5933, lr_0 = 7.2907e-04
Loss = 1.4562e-01, PNorm = 52.0728, GNorm = 0.6593, lr_0 = 7.2857e-04
Loss = 1.4352e-01, PNorm = 52.0964, GNorm = 1.5924, lr_0 = 7.2807e-04
Loss = 1.4928e-01, PNorm = 52.1156, GNorm = 1.9233, lr_0 = 7.2757e-04
Loss = 1.3385e-01, PNorm = 52.1321, GNorm = 0.7829, lr_0 = 7.2707e-04
Loss = 1.3400e-01, PNorm = 52.1495, GNorm = 0.7441, lr_0 = 7.2657e-04
Loss = 1.6982e-01, PNorm = 52.1672, GNorm = 1.2039, lr_0 = 7.2608e-04
Loss = 1.2788e-01, PNorm = 52.1822, GNorm = 2.9059, lr_0 = 7.2558e-04
Loss = 1.3143e-01, PNorm = 52.1959, GNorm = 0.6817, lr_0 = 7.2508e-04
Loss = 1.5485e-01, PNorm = 52.2126, GNorm = 1.4066, lr_0 = 7.2458e-04
Loss = 1.3858e-01, PNorm = 52.2309, GNorm = 1.1073, lr_0 = 7.2409e-04
Loss = 1.3632e-01, PNorm = 52.2540, GNorm = 0.7421, lr_0 = 7.2359e-04
Loss = 1.2700e-01, PNorm = 52.2618, GNorm = 1.1049, lr_0 = 7.2310e-04
Loss = 1.3782e-01, PNorm = 52.2715, GNorm = 0.6196, lr_0 = 7.2260e-04
Loss = 1.3888e-01, PNorm = 52.2843, GNorm = 0.5381, lr_0 = 7.2211e-04
Loss = 1.3149e-01, PNorm = 52.2973, GNorm = 0.9273, lr_0 = 7.2161e-04
Loss = 1.4605e-01, PNorm = 52.3130, GNorm = 0.8897, lr_0 = 7.2112e-04
Loss = 1.3376e-01, PNorm = 52.3333, GNorm = 0.6365, lr_0 = 7.2062e-04
Loss = 1.3485e-01, PNorm = 52.3517, GNorm = 1.1794, lr_0 = 7.2013e-04
Loss = 1.2893e-01, PNorm = 52.3671, GNorm = 1.1008, lr_0 = 7.1964e-04
Validation mae = 0.427020
Epoch 6
Loss = 1.2101e-01, PNorm = 52.3825, GNorm = 0.6913, lr_0 = 7.1914e-04
Loss = 1.1512e-01, PNorm = 52.3976, GNorm = 1.2767, lr_0 = 7.1865e-04
Loss = 1.1796e-01, PNorm = 52.4087, GNorm = 1.7142, lr_0 = 7.1816e-04
Loss = 1.2202e-01, PNorm = 52.4209, GNorm = 0.9923, lr_0 = 7.1767e-04
Loss = 1.3472e-01, PNorm = 52.4330, GNorm = 0.5888, lr_0 = 7.1717e-04
Loss = 1.4062e-01, PNorm = 52.4466, GNorm = 2.4825, lr_0 = 7.1668e-04
Loss = 1.2119e-01, PNorm = 52.4630, GNorm = 0.6653, lr_0 = 7.1619e-04
Loss = 1.2961e-01, PNorm = 52.4834, GNorm = 0.9005, lr_0 = 7.1570e-04
Loss = 1.2156e-01, PNorm = 52.4978, GNorm = 1.3105, lr_0 = 7.1521e-04
Loss = 1.1933e-01, PNorm = 52.5172, GNorm = 0.9239, lr_0 = 7.1472e-04
Loss = 1.1549e-01, PNorm = 52.5327, GNorm = 0.7953, lr_0 = 7.1423e-04
Loss = 1.2343e-01, PNorm = 52.5494, GNorm = 1.4487, lr_0 = 7.1374e-04
Loss = 1.2715e-01, PNorm = 52.5613, GNorm = 1.9381, lr_0 = 7.1325e-04
Loss = 1.3966e-01, PNorm = 52.5827, GNorm = 1.0954, lr_0 = 7.1277e-04
Loss = 1.3721e-01, PNorm = 52.6048, GNorm = 0.7611, lr_0 = 7.1228e-04
Loss = 1.2971e-01, PNorm = 52.6210, GNorm = 1.8518, lr_0 = 7.1179e-04
Loss = 1.3797e-01, PNorm = 52.6427, GNorm = 1.2625, lr_0 = 7.1130e-04
Loss = 1.2185e-01, PNorm = 52.6582, GNorm = 0.6996, lr_0 = 7.1081e-04
Loss = 1.1482e-01, PNorm = 52.6738, GNorm = 1.1828, lr_0 = 7.1033e-04
Loss = 1.2593e-01, PNorm = 52.6952, GNorm = 0.8020, lr_0 = 7.0984e-04
Loss = 1.5629e-01, PNorm = 52.7093, GNorm = 0.5677, lr_0 = 7.0935e-04
Loss = 1.3777e-01, PNorm = 52.7268, GNorm = 1.6925, lr_0 = 7.0887e-04
Loss = 1.3182e-01, PNorm = 52.7517, GNorm = 1.8964, lr_0 = 7.0838e-04
Loss = 1.2979e-01, PNorm = 52.7783, GNorm = 1.2937, lr_0 = 7.0790e-04
Loss = 1.1083e-01, PNorm = 52.8023, GNorm = 1.6361, lr_0 = 7.0741e-04
Loss = 9.9185e-02, PNorm = 52.8181, GNorm = 0.4546, lr_0 = 7.0693e-04
Loss = 1.2741e-01, PNorm = 52.8333, GNorm = 0.8681, lr_0 = 7.0644e-04
Loss = 1.2979e-01, PNorm = 52.8497, GNorm = 0.6889, lr_0 = 7.0596e-04
Loss = 1.1467e-01, PNorm = 52.8627, GNorm = 1.5070, lr_0 = 7.0548e-04
Loss = 1.1885e-01, PNorm = 52.8817, GNorm = 0.4946, lr_0 = 7.0499e-04
Loss = 1.2290e-01, PNorm = 52.8964, GNorm = 0.7098, lr_0 = 7.0451e-04
Loss = 1.3877e-01, PNorm = 52.9143, GNorm = 1.3119, lr_0 = 7.0403e-04
Loss = 1.1622e-01, PNorm = 52.9276, GNorm = 0.6491, lr_0 = 7.0354e-04
Loss = 1.4985e-01, PNorm = 52.9424, GNorm = 1.5224, lr_0 = 7.0306e-04
Loss = 1.6016e-01, PNorm = 52.9659, GNorm = 0.5334, lr_0 = 7.0258e-04
Loss = 1.5138e-01, PNorm = 52.9938, GNorm = 1.5697, lr_0 = 7.0210e-04
Loss = 1.2378e-01, PNorm = 53.0137, GNorm = 2.2347, lr_0 = 7.0162e-04
Loss = 1.2303e-01, PNorm = 53.0349, GNorm = 1.0875, lr_0 = 7.0114e-04
Loss = 1.1430e-01, PNorm = 53.0560, GNorm = 0.7820, lr_0 = 7.0066e-04
Loss = 1.3775e-01, PNorm = 53.0738, GNorm = 1.6037, lr_0 = 7.0018e-04
Loss = 1.2763e-01, PNorm = 53.0911, GNorm = 1.3288, lr_0 = 6.9970e-04
Loss = 1.3139e-01, PNorm = 53.1095, GNorm = 0.7358, lr_0 = 6.9922e-04
Loss = 1.1772e-01, PNorm = 53.1275, GNorm = 0.8390, lr_0 = 6.9874e-04
Loss = 1.3096e-01, PNorm = 53.1433, GNorm = 1.8538, lr_0 = 6.9826e-04
Loss = 1.2688e-01, PNorm = 53.1582, GNorm = 0.9461, lr_0 = 6.9778e-04
Loss = 1.4165e-01, PNorm = 53.1774, GNorm = 0.9728, lr_0 = 6.9730e-04
Loss = 1.4898e-01, PNorm = 53.1875, GNorm = 1.8631, lr_0 = 6.9683e-04
Loss = 1.4629e-01, PNorm = 53.2099, GNorm = 0.7129, lr_0 = 6.9635e-04
Loss = 1.2745e-01, PNorm = 53.2373, GNorm = 0.9779, lr_0 = 6.9587e-04
Loss = 1.3273e-01, PNorm = 53.2642, GNorm = 0.8384, lr_0 = 6.9540e-04
Loss = 1.2707e-01, PNorm = 53.2836, GNorm = 1.7098, lr_0 = 6.9492e-04
Loss = 1.1690e-01, PNorm = 53.2972, GNorm = 0.8248, lr_0 = 6.9444e-04
Loss = 1.2582e-01, PNorm = 53.3154, GNorm = 0.9501, lr_0 = 6.9397e-04
Loss = 1.3188e-01, PNorm = 53.3341, GNorm = 1.4853, lr_0 = 6.9349e-04
Loss = 1.3107e-01, PNorm = 53.3441, GNorm = 0.5369, lr_0 = 6.9302e-04
Loss = 1.0467e-01, PNorm = 53.3566, GNorm = 0.5245, lr_0 = 6.9254e-04
Loss = 1.3054e-01, PNorm = 53.3780, GNorm = 0.6795, lr_0 = 6.9207e-04
Loss = 1.3505e-01, PNorm = 53.4036, GNorm = 0.5595, lr_0 = 6.9159e-04
Loss = 1.1766e-01, PNorm = 53.4236, GNorm = 1.7141, lr_0 = 6.9112e-04
Loss = 1.2991e-01, PNorm = 53.4439, GNorm = 0.6166, lr_0 = 6.9065e-04
Loss = 1.4267e-01, PNorm = 53.4684, GNorm = 0.5919, lr_0 = 6.9017e-04
Loss = 1.2959e-01, PNorm = 53.4874, GNorm = 1.1598, lr_0 = 6.8970e-04
Loss = 1.3036e-01, PNorm = 53.5088, GNorm = 1.8221, lr_0 = 6.8923e-04
Loss = 1.3731e-01, PNorm = 53.5292, GNorm = 0.7069, lr_0 = 6.8876e-04
Loss = 1.1948e-01, PNorm = 53.5442, GNorm = 0.8181, lr_0 = 6.8828e-04
Loss = 1.2349e-01, PNorm = 53.5565, GNorm = 1.5371, lr_0 = 6.8781e-04
Loss = 1.1743e-01, PNorm = 53.5720, GNorm = 0.7877, lr_0 = 6.8734e-04
Loss = 1.3672e-01, PNorm = 53.5913, GNorm = 1.5498, lr_0 = 6.8687e-04
Loss = 1.2464e-01, PNorm = 53.6052, GNorm = 0.6542, lr_0 = 6.8640e-04
Loss = 1.3668e-01, PNorm = 53.6201, GNorm = 1.3157, lr_0 = 6.8593e-04
Loss = 1.3064e-01, PNorm = 53.6370, GNorm = 0.6360, lr_0 = 6.8546e-04
Loss = 1.4108e-01, PNorm = 53.6547, GNorm = 1.3472, lr_0 = 6.8499e-04
Loss = 1.3204e-01, PNorm = 53.6745, GNorm = 1.8090, lr_0 = 6.8452e-04
Loss = 1.1613e-01, PNorm = 53.6904, GNorm = 1.3548, lr_0 = 6.8405e-04
Loss = 1.2906e-01, PNorm = 53.7040, GNorm = 1.3327, lr_0 = 6.8358e-04
Loss = 1.2717e-01, PNorm = 53.7218, GNorm = 1.8561, lr_0 = 6.8312e-04
Loss = 1.4643e-01, PNorm = 53.7485, GNorm = 0.8873, lr_0 = 6.8265e-04
Loss = 1.2577e-01, PNorm = 53.7684, GNorm = 0.6965, lr_0 = 6.8218e-04
Loss = 1.2989e-01, PNorm = 53.7785, GNorm = 0.6928, lr_0 = 6.8171e-04
Loss = 1.2541e-01, PNorm = 53.7957, GNorm = 0.9416, lr_0 = 6.8125e-04
Loss = 1.0612e-01, PNorm = 53.8147, GNorm = 1.0369, lr_0 = 6.8078e-04
Loss = 1.1713e-01, PNorm = 53.8292, GNorm = 0.9942, lr_0 = 6.8031e-04
Loss = 1.2499e-01, PNorm = 53.8445, GNorm = 1.0515, lr_0 = 6.7985e-04
Loss = 1.1028e-01, PNorm = 53.8628, GNorm = 0.9122, lr_0 = 6.7938e-04
Loss = 1.2999e-01, PNorm = 53.8828, GNorm = 1.1407, lr_0 = 6.7892e-04
Loss = 1.3844e-01, PNorm = 53.9023, GNorm = 1.2841, lr_0 = 6.7845e-04
Loss = 1.2317e-01, PNorm = 53.9283, GNorm = 0.7867, lr_0 = 6.7799e-04
Loss = 1.2024e-01, PNorm = 53.9444, GNorm = 0.6116, lr_0 = 6.7752e-04
Loss = 1.2749e-01, PNorm = 53.9532, GNorm = 0.6472, lr_0 = 6.7706e-04
Loss = 1.2920e-01, PNorm = 53.9634, GNorm = 0.9396, lr_0 = 6.7659e-04
Loss = 1.0463e-01, PNorm = 53.9794, GNorm = 1.8410, lr_0 = 6.7613e-04
Loss = 1.1711e-01, PNorm = 53.9959, GNorm = 0.5563, lr_0 = 6.7567e-04
Loss = 1.2979e-01, PNorm = 54.0070, GNorm = 0.7729, lr_0 = 6.7520e-04
Loss = 1.2644e-01, PNorm = 54.0151, GNorm = 0.5760, lr_0 = 6.7474e-04
Loss = 1.2776e-01, PNorm = 54.0332, GNorm = 1.0744, lr_0 = 6.7428e-04
Loss = 1.2118e-01, PNorm = 54.0538, GNorm = 1.0557, lr_0 = 6.7382e-04
Loss = 1.2915e-01, PNorm = 54.0628, GNorm = 0.8261, lr_0 = 6.7335e-04
Loss = 1.5890e-01, PNorm = 54.0806, GNorm = 1.6165, lr_0 = 6.7289e-04
Loss = 1.2056e-01, PNorm = 54.1050, GNorm = 2.3746, lr_0 = 6.7243e-04
Loss = 1.1545e-01, PNorm = 54.1180, GNorm = 0.6824, lr_0 = 6.7197e-04
Loss = 1.3018e-01, PNorm = 54.1423, GNorm = 1.5705, lr_0 = 6.7151e-04
Loss = 1.3148e-01, PNorm = 54.1645, GNorm = 0.9246, lr_0 = 6.7105e-04
Loss = 1.2934e-01, PNorm = 54.1851, GNorm = 0.6384, lr_0 = 6.7059e-04
Loss = 1.4654e-01, PNorm = 54.2023, GNorm = 2.3746, lr_0 = 6.7013e-04
Loss = 1.3672e-01, PNorm = 54.2236, GNorm = 2.8419, lr_0 = 6.6967e-04
Loss = 1.4237e-01, PNorm = 54.2497, GNorm = 1.3258, lr_0 = 6.6921e-04
Loss = 1.4477e-01, PNorm = 54.2773, GNorm = 1.2286, lr_0 = 6.6876e-04
Loss = 1.2640e-01, PNorm = 54.2957, GNorm = 0.7530, lr_0 = 6.6830e-04
Loss = 1.2918e-01, PNorm = 54.3156, GNorm = 1.0577, lr_0 = 6.6784e-04
Loss = 1.3463e-01, PNorm = 54.3361, GNorm = 0.5279, lr_0 = 6.6738e-04
Loss = 1.2481e-01, PNorm = 54.3502, GNorm = 1.0450, lr_0 = 6.6693e-04
Loss = 1.3790e-01, PNorm = 54.3612, GNorm = 1.2742, lr_0 = 6.6647e-04
Loss = 1.3077e-01, PNorm = 54.3714, GNorm = 0.6134, lr_0 = 6.6601e-04
Loss = 1.3080e-01, PNorm = 54.3833, GNorm = 0.6995, lr_0 = 6.6556e-04
Loss = 1.1594e-01, PNorm = 54.3980, GNorm = 0.6929, lr_0 = 6.6510e-04
Loss = 1.2758e-01, PNorm = 54.4181, GNorm = 0.7533, lr_0 = 6.6464e-04
Loss = 1.2520e-01, PNorm = 54.4299, GNorm = 0.6724, lr_0 = 6.6419e-04
Loss = 1.3032e-01, PNorm = 54.4364, GNorm = 0.9971, lr_0 = 6.6373e-04
Loss = 1.3223e-01, PNorm = 54.4583, GNorm = 2.5659, lr_0 = 6.6328e-04
Loss = 1.1233e-01, PNorm = 54.4725, GNorm = 1.0749, lr_0 = 6.6282e-04
Validation mae = 0.429564
Epoch 7
Loss = 1.1206e-01, PNorm = 54.4860, GNorm = 1.4815, lr_0 = 6.6237e-04
Loss = 1.2058e-01, PNorm = 54.5033, GNorm = 1.2980, lr_0 = 6.6192e-04
Loss = 1.1067e-01, PNorm = 54.5241, GNorm = 1.0814, lr_0 = 6.6146e-04
Loss = 1.2854e-01, PNorm = 54.5418, GNorm = 1.6030, lr_0 = 6.6101e-04
Loss = 1.3107e-01, PNorm = 54.5590, GNorm = 0.8414, lr_0 = 6.6056e-04
Loss = 1.3789e-01, PNorm = 54.5809, GNorm = 1.6217, lr_0 = 6.6011e-04
Loss = 1.2092e-01, PNorm = 54.6114, GNorm = 0.6723, lr_0 = 6.5965e-04
Loss = 1.1426e-01, PNorm = 54.6340, GNorm = 0.9669, lr_0 = 6.5920e-04
Loss = 1.0713e-01, PNorm = 54.6524, GNorm = 1.1587, lr_0 = 6.5875e-04
Loss = 1.4906e-01, PNorm = 54.6707, GNorm = 1.7808, lr_0 = 6.5830e-04
Loss = 1.3628e-01, PNorm = 54.6930, GNorm = 1.5432, lr_0 = 6.5785e-04
Loss = 1.3420e-01, PNorm = 54.7154, GNorm = 1.2395, lr_0 = 6.5740e-04
Loss = 1.1833e-01, PNorm = 54.7300, GNorm = 0.9814, lr_0 = 6.5695e-04
Loss = 9.9431e-02, PNorm = 54.7462, GNorm = 0.5929, lr_0 = 6.5650e-04
Loss = 1.0420e-01, PNorm = 54.7629, GNorm = 1.0283, lr_0 = 6.5605e-04
Loss = 1.1448e-01, PNorm = 54.7718, GNorm = 0.7409, lr_0 = 6.5560e-04
Loss = 1.0999e-01, PNorm = 54.7885, GNorm = 0.9259, lr_0 = 6.5515e-04
Loss = 1.3275e-01, PNorm = 54.8081, GNorm = 0.5584, lr_0 = 6.5470e-04
Loss = 1.2687e-01, PNorm = 54.8226, GNorm = 1.8528, lr_0 = 6.5425e-04
Loss = 1.3100e-01, PNorm = 54.8369, GNorm = 0.5762, lr_0 = 6.5380e-04
Loss = 1.0404e-01, PNorm = 54.8499, GNorm = 1.1100, lr_0 = 6.5335e-04
Loss = 1.2356e-01, PNorm = 54.8666, GNorm = 0.9776, lr_0 = 6.5291e-04
Loss = 1.1465e-01, PNorm = 54.8807, GNorm = 0.6274, lr_0 = 6.5246e-04
Loss = 1.0777e-01, PNorm = 54.8994, GNorm = 0.6638, lr_0 = 6.5201e-04
Loss = 1.1171e-01, PNorm = 54.9115, GNorm = 1.2437, lr_0 = 6.5157e-04
Loss = 1.3497e-01, PNorm = 54.9212, GNorm = 0.8283, lr_0 = 6.5112e-04
Loss = 1.1464e-01, PNorm = 54.9376, GNorm = 2.2219, lr_0 = 6.5067e-04
Loss = 1.1924e-01, PNorm = 54.9526, GNorm = 0.7300, lr_0 = 6.5023e-04
Loss = 1.0114e-01, PNorm = 54.9670, GNorm = 0.7088, lr_0 = 6.4978e-04
Loss = 1.1326e-01, PNorm = 54.9827, GNorm = 0.9265, lr_0 = 6.4934e-04
Loss = 1.0538e-01, PNorm = 54.9960, GNorm = 1.4691, lr_0 = 6.4889e-04
Loss = 1.3787e-01, PNorm = 55.0170, GNorm = 1.1537, lr_0 = 6.4845e-04
Loss = 1.1390e-01, PNorm = 55.0357, GNorm = 0.6709, lr_0 = 6.4800e-04
Loss = 1.1084e-01, PNorm = 55.0510, GNorm = 0.5859, lr_0 = 6.4756e-04
Loss = 1.0897e-01, PNorm = 55.0735, GNorm = 1.4900, lr_0 = 6.4712e-04
Loss = 1.4416e-01, PNorm = 55.0908, GNorm = 1.9836, lr_0 = 6.4667e-04
Loss = 1.2845e-01, PNorm = 55.1069, GNorm = 1.3736, lr_0 = 6.4623e-04
Loss = 1.3586e-01, PNorm = 55.1280, GNorm = 1.7139, lr_0 = 6.4579e-04
Loss = 1.1423e-01, PNorm = 55.1528, GNorm = 0.4450, lr_0 = 6.4534e-04
Loss = 1.0153e-01, PNorm = 55.1692, GNorm = 0.9005, lr_0 = 6.4490e-04
Loss = 1.0531e-01, PNorm = 55.1771, GNorm = 0.5702, lr_0 = 6.4446e-04
Loss = 1.2131e-01, PNorm = 55.1857, GNorm = 1.6124, lr_0 = 6.4402e-04
Loss = 1.0905e-01, PNorm = 55.2003, GNorm = 0.7700, lr_0 = 6.4358e-04
Loss = 1.1264e-01, PNorm = 55.2137, GNorm = 1.1142, lr_0 = 6.4314e-04
Loss = 1.3002e-01, PNorm = 55.2260, GNorm = 0.5707, lr_0 = 6.4270e-04
Loss = 1.0013e-01, PNorm = 55.2386, GNorm = 1.4992, lr_0 = 6.4226e-04
Loss = 1.1359e-01, PNorm = 55.2549, GNorm = 0.9235, lr_0 = 6.4182e-04
Loss = 1.2767e-01, PNorm = 55.2733, GNorm = 0.6919, lr_0 = 6.4138e-04
Loss = 1.1459e-01, PNorm = 55.2879, GNorm = 0.8824, lr_0 = 6.4094e-04
Loss = 1.0330e-01, PNorm = 55.2995, GNorm = 0.4713, lr_0 = 6.4050e-04
Loss = 1.2034e-01, PNorm = 55.3155, GNorm = 0.6696, lr_0 = 6.4006e-04
Loss = 1.1146e-01, PNorm = 55.3328, GNorm = 0.8630, lr_0 = 6.3962e-04
Loss = 1.0551e-01, PNorm = 55.3486, GNorm = 0.9273, lr_0 = 6.3918e-04
Loss = 1.1035e-01, PNorm = 55.3640, GNorm = 0.6190, lr_0 = 6.3874e-04
Loss = 1.1099e-01, PNorm = 55.3823, GNorm = 0.6773, lr_0 = 6.3831e-04
Loss = 1.1327e-01, PNorm = 55.3959, GNorm = 0.6645, lr_0 = 6.3787e-04
Loss = 1.1993e-01, PNorm = 55.4091, GNorm = 2.4321, lr_0 = 6.3743e-04
Loss = 1.2576e-01, PNorm = 55.4244, GNorm = 1.2141, lr_0 = 6.3700e-04
Loss = 1.1132e-01, PNorm = 55.4390, GNorm = 0.7794, lr_0 = 6.3656e-04
Loss = 1.0552e-01, PNorm = 55.4553, GNorm = 0.8611, lr_0 = 6.3612e-04
Loss = 1.2066e-01, PNorm = 55.4660, GNorm = 0.7890, lr_0 = 6.3569e-04
Loss = 1.3575e-01, PNorm = 55.4800, GNorm = 1.1806, lr_0 = 6.3525e-04
Loss = 1.1812e-01, PNorm = 55.4994, GNorm = 0.5841, lr_0 = 6.3482e-04
Loss = 1.2648e-01, PNorm = 55.5108, GNorm = 1.4864, lr_0 = 6.3438e-04
Loss = 1.2501e-01, PNorm = 55.5260, GNorm = 1.1512, lr_0 = 6.3395e-04
Loss = 1.2343e-01, PNorm = 55.5387, GNorm = 0.4907, lr_0 = 6.3351e-04
Loss = 1.3495e-01, PNorm = 55.5511, GNorm = 0.8246, lr_0 = 6.3308e-04
Loss = 1.3674e-01, PNorm = 55.5699, GNorm = 1.0135, lr_0 = 6.3265e-04
Loss = 1.1845e-01, PNorm = 55.5870, GNorm = 0.7556, lr_0 = 6.3221e-04
Loss = 1.2588e-01, PNorm = 55.6016, GNorm = 0.7205, lr_0 = 6.3178e-04
Loss = 1.1872e-01, PNorm = 55.6150, GNorm = 1.1723, lr_0 = 6.3135e-04
Loss = 1.3167e-01, PNorm = 55.6329, GNorm = 0.9691, lr_0 = 6.3091e-04
Loss = 1.2299e-01, PNorm = 55.6608, GNorm = 0.5776, lr_0 = 6.3048e-04
Loss = 1.2242e-01, PNorm = 55.6895, GNorm = 0.8425, lr_0 = 6.3005e-04
Loss = 9.8050e-02, PNorm = 55.7077, GNorm = 0.4622, lr_0 = 6.2962e-04
Loss = 1.2225e-01, PNorm = 55.7243, GNorm = 0.7354, lr_0 = 6.2919e-04
Loss = 1.1769e-01, PNorm = 55.7424, GNorm = 1.9533, lr_0 = 6.2876e-04
Loss = 1.2629e-01, PNorm = 55.7664, GNorm = 0.9879, lr_0 = 6.2833e-04
Loss = 1.2681e-01, PNorm = 55.7839, GNorm = 1.5947, lr_0 = 6.2789e-04
Loss = 1.2922e-01, PNorm = 55.8037, GNorm = 0.7753, lr_0 = 6.2746e-04
Loss = 1.3727e-01, PNorm = 55.8190, GNorm = 1.1513, lr_0 = 6.2703e-04
Loss = 1.1486e-01, PNorm = 55.8380, GNorm = 0.7751, lr_0 = 6.2661e-04
Loss = 1.1621e-01, PNorm = 55.8518, GNorm = 0.9758, lr_0 = 6.2618e-04
Loss = 1.2132e-01, PNorm = 55.8612, GNorm = 0.9041, lr_0 = 6.2575e-04
Loss = 1.2536e-01, PNorm = 55.8698, GNorm = 0.9801, lr_0 = 6.2532e-04
Loss = 1.1659e-01, PNorm = 55.8830, GNorm = 0.9816, lr_0 = 6.2489e-04
Loss = 1.1528e-01, PNorm = 55.9014, GNorm = 0.4665, lr_0 = 6.2446e-04
Loss = 1.1319e-01, PNorm = 55.9141, GNorm = 1.2569, lr_0 = 6.2403e-04
Loss = 1.1281e-01, PNorm = 55.9240, GNorm = 0.9529, lr_0 = 6.2361e-04
Loss = 1.0942e-01, PNorm = 55.9330, GNorm = 0.8752, lr_0 = 6.2318e-04
Loss = 1.1570e-01, PNorm = 55.9430, GNorm = 0.8963, lr_0 = 6.2275e-04
Loss = 1.2461e-01, PNorm = 55.9547, GNorm = 0.6871, lr_0 = 6.2233e-04
Loss = 1.0659e-01, PNorm = 55.9670, GNorm = 0.5934, lr_0 = 6.2190e-04
Loss = 1.0225e-01, PNorm = 55.9832, GNorm = 1.4226, lr_0 = 6.2147e-04
Loss = 1.2396e-01, PNorm = 56.0034, GNorm = 1.6367, lr_0 = 6.2105e-04
Loss = 1.1346e-01, PNorm = 56.0210, GNorm = 0.7920, lr_0 = 6.2062e-04
Loss = 1.1023e-01, PNorm = 56.0423, GNorm = 0.6091, lr_0 = 6.2020e-04
Loss = 1.1382e-01, PNorm = 56.0628, GNorm = 0.7007, lr_0 = 6.1977e-04
Loss = 1.1341e-01, PNorm = 56.0782, GNorm = 1.0842, lr_0 = 6.1935e-04
Loss = 1.0435e-01, PNorm = 56.0925, GNorm = 1.0339, lr_0 = 6.1892e-04
Loss = 1.2506e-01, PNorm = 56.1082, GNorm = 0.7529, lr_0 = 6.1850e-04
Loss = 1.2059e-01, PNorm = 56.1190, GNorm = 1.6143, lr_0 = 6.1808e-04
Loss = 1.2759e-01, PNorm = 56.1335, GNorm = 0.5371, lr_0 = 6.1765e-04
Loss = 1.2560e-01, PNorm = 56.1506, GNorm = 0.6123, lr_0 = 6.1723e-04
Loss = 1.1180e-01, PNorm = 56.1652, GNorm = 0.5434, lr_0 = 6.1681e-04
Loss = 1.1512e-01, PNorm = 56.1863, GNorm = 0.9463, lr_0 = 6.1638e-04
Loss = 1.1679e-01, PNorm = 56.1993, GNorm = 0.6545, lr_0 = 6.1596e-04
Loss = 1.0596e-01, PNorm = 56.2122, GNorm = 0.6346, lr_0 = 6.1554e-04
Loss = 1.2777e-01, PNorm = 56.2250, GNorm = 1.1058, lr_0 = 6.1512e-04
Loss = 1.2341e-01, PNorm = 56.2411, GNorm = 1.0588, lr_0 = 6.1470e-04
Loss = 1.1434e-01, PNorm = 56.2549, GNorm = 1.7016, lr_0 = 6.1428e-04
Loss = 1.2965e-01, PNorm = 56.2682, GNorm = 1.6551, lr_0 = 6.1385e-04
Loss = 1.3028e-01, PNorm = 56.2928, GNorm = 1.7455, lr_0 = 6.1343e-04
Loss = 1.1803e-01, PNorm = 56.3192, GNorm = 0.8792, lr_0 = 6.1301e-04
Loss = 1.1475e-01, PNorm = 56.3362, GNorm = 1.1710, lr_0 = 6.1259e-04
Loss = 1.1049e-01, PNorm = 56.3464, GNorm = 0.8242, lr_0 = 6.1217e-04
Loss = 1.0553e-01, PNorm = 56.3604, GNorm = 0.6022, lr_0 = 6.1175e-04
Loss = 1.1191e-01, PNorm = 56.3752, GNorm = 1.4277, lr_0 = 6.1134e-04
Loss = 1.2157e-01, PNorm = 56.3852, GNorm = 0.9472, lr_0 = 6.1092e-04
Loss = 1.3461e-01, PNorm = 56.3985, GNorm = 2.1307, lr_0 = 6.1050e-04
Validation mae = 0.428161
Epoch 8
Loss = 1.0815e-01, PNorm = 56.4163, GNorm = 1.5260, lr_0 = 6.1008e-04
Loss = 1.1979e-01, PNorm = 56.4385, GNorm = 0.9674, lr_0 = 6.0966e-04
Loss = 1.1237e-01, PNorm = 56.4563, GNorm = 0.6170, lr_0 = 6.0924e-04
Loss = 1.2069e-01, PNorm = 56.4693, GNorm = 0.5882, lr_0 = 6.0883e-04
Loss = 1.2555e-01, PNorm = 56.4864, GNorm = 0.6982, lr_0 = 6.0841e-04
Loss = 1.0319e-01, PNorm = 56.5070, GNorm = 0.6216, lr_0 = 6.0799e-04
Loss = 1.2580e-01, PNorm = 56.5251, GNorm = 0.8769, lr_0 = 6.0758e-04
Loss = 1.0750e-01, PNorm = 56.5446, GNorm = 1.0296, lr_0 = 6.0716e-04
Loss = 1.0375e-01, PNorm = 56.5647, GNorm = 0.7436, lr_0 = 6.0674e-04
Loss = 1.1271e-01, PNorm = 56.5809, GNorm = 0.8446, lr_0 = 6.0633e-04
Loss = 1.0244e-01, PNorm = 56.5948, GNorm = 0.6618, lr_0 = 6.0591e-04
Loss = 1.2023e-01, PNorm = 56.6052, GNorm = 0.9308, lr_0 = 6.0550e-04
Loss = 1.2077e-01, PNorm = 56.6216, GNorm = 1.1146, lr_0 = 6.0508e-04
Loss = 9.6773e-02, PNorm = 56.6328, GNorm = 0.8690, lr_0 = 6.0467e-04
Loss = 9.3908e-02, PNorm = 56.6430, GNorm = 0.8627, lr_0 = 6.0425e-04
Loss = 1.0164e-01, PNorm = 56.6573, GNorm = 0.8086, lr_0 = 6.0384e-04
Loss = 1.1717e-01, PNorm = 56.6687, GNorm = 0.6409, lr_0 = 6.0343e-04
Loss = 9.9092e-02, PNorm = 56.6878, GNorm = 1.6066, lr_0 = 6.0301e-04
Loss = 1.1119e-01, PNorm = 56.7118, GNorm = 1.5136, lr_0 = 6.0260e-04
Loss = 1.0974e-01, PNorm = 56.7340, GNorm = 1.7334, lr_0 = 6.0219e-04
Loss = 1.1155e-01, PNorm = 56.7524, GNorm = 0.5930, lr_0 = 6.0178e-04
Loss = 1.2385e-01, PNorm = 56.7720, GNorm = 1.0707, lr_0 = 6.0136e-04
Loss = 1.1218e-01, PNorm = 56.7956, GNorm = 1.4858, lr_0 = 6.0095e-04
Loss = 1.2058e-01, PNorm = 56.8165, GNorm = 1.2428, lr_0 = 6.0054e-04
Loss = 1.0682e-01, PNorm = 56.8290, GNorm = 0.7886, lr_0 = 6.0013e-04
Loss = 1.0273e-01, PNorm = 56.8439, GNorm = 0.8170, lr_0 = 5.9972e-04
Loss = 1.1072e-01, PNorm = 56.8588, GNorm = 0.8323, lr_0 = 5.9931e-04
Loss = 1.0144e-01, PNorm = 56.8717, GNorm = 0.8142, lr_0 = 5.9890e-04
Loss = 9.8136e-02, PNorm = 56.8827, GNorm = 1.1067, lr_0 = 5.9849e-04
Loss = 1.1666e-01, PNorm = 56.8993, GNorm = 1.5770, lr_0 = 5.9808e-04
Loss = 9.1648e-02, PNorm = 56.9141, GNorm = 0.7243, lr_0 = 5.9767e-04
Loss = 1.0597e-01, PNorm = 56.9230, GNorm = 0.5800, lr_0 = 5.9726e-04
Loss = 1.1100e-01, PNorm = 56.9388, GNorm = 1.3672, lr_0 = 5.9685e-04
Loss = 1.1414e-01, PNorm = 56.9533, GNorm = 0.5892, lr_0 = 5.9644e-04
Loss = 1.1522e-01, PNorm = 56.9668, GNorm = 1.3154, lr_0 = 5.9603e-04
Loss = 1.1453e-01, PNorm = 56.9797, GNorm = 0.6043, lr_0 = 5.9562e-04
Loss = 9.8963e-02, PNorm = 56.9925, GNorm = 0.6164, lr_0 = 5.9521e-04
Loss = 1.0565e-01, PNorm = 57.0077, GNorm = 0.8017, lr_0 = 5.9481e-04
Loss = 1.0270e-01, PNorm = 57.0201, GNorm = 1.3888, lr_0 = 5.9440e-04
Loss = 1.1421e-01, PNorm = 57.0353, GNorm = 0.6529, lr_0 = 5.9399e-04
Loss = 1.1168e-01, PNorm = 57.0473, GNorm = 1.0641, lr_0 = 5.9358e-04
Loss = 1.0652e-01, PNorm = 57.0584, GNorm = 0.7948, lr_0 = 5.9318e-04
Loss = 1.0329e-01, PNorm = 57.0748, GNorm = 0.5374, lr_0 = 5.9277e-04
Loss = 1.1949e-01, PNorm = 57.0914, GNorm = 1.0259, lr_0 = 5.9236e-04
Loss = 1.1616e-01, PNorm = 57.1066, GNorm = 0.7046, lr_0 = 5.9196e-04
Loss = 1.0921e-01, PNorm = 57.1207, GNorm = 1.2855, lr_0 = 5.9155e-04
Loss = 1.1111e-01, PNorm = 57.1309, GNorm = 0.7008, lr_0 = 5.9115e-04
Loss = 1.0798e-01, PNorm = 57.1428, GNorm = 0.5860, lr_0 = 5.9074e-04
Loss = 1.3070e-01, PNorm = 57.1553, GNorm = 1.3262, lr_0 = 5.9034e-04
Loss = 1.0191e-01, PNorm = 57.1739, GNorm = 1.1295, lr_0 = 5.8993e-04
Loss = 1.0965e-01, PNorm = 57.1898, GNorm = 0.7109, lr_0 = 5.8953e-04
Loss = 1.0905e-01, PNorm = 57.2032, GNorm = 1.4443, lr_0 = 5.8913e-04
Loss = 1.1501e-01, PNorm = 57.2166, GNorm = 1.4076, lr_0 = 5.8872e-04
Loss = 1.1031e-01, PNorm = 57.2329, GNorm = 0.5228, lr_0 = 5.8832e-04
Loss = 1.0271e-01, PNorm = 57.2508, GNorm = 0.7440, lr_0 = 5.8792e-04
Loss = 9.8811e-02, PNorm = 57.2675, GNorm = 0.9371, lr_0 = 5.8751e-04
Loss = 1.0059e-01, PNorm = 57.2810, GNorm = 0.7922, lr_0 = 5.8711e-04
Loss = 1.1906e-01, PNorm = 57.2994, GNorm = 1.2567, lr_0 = 5.8671e-04
Loss = 1.2175e-01, PNorm = 57.3144, GNorm = 0.5048, lr_0 = 5.8631e-04
Loss = 1.0484e-01, PNorm = 57.3298, GNorm = 0.8319, lr_0 = 5.8591e-04
Loss = 1.0745e-01, PNorm = 57.3449, GNorm = 0.5337, lr_0 = 5.8550e-04
Loss = 1.1067e-01, PNorm = 57.3610, GNorm = 1.0560, lr_0 = 5.8510e-04
Loss = 1.1949e-01, PNorm = 57.3803, GNorm = 0.7621, lr_0 = 5.8470e-04
Loss = 1.1477e-01, PNorm = 57.3943, GNorm = 0.8848, lr_0 = 5.8430e-04
Loss = 1.0380e-01, PNorm = 57.4056, GNorm = 0.4944, lr_0 = 5.8390e-04
Loss = 1.1049e-01, PNorm = 57.4234, GNorm = 0.6886, lr_0 = 5.8350e-04
Loss = 1.2334e-01, PNorm = 57.4437, GNorm = 0.7262, lr_0 = 5.8310e-04
Loss = 1.0741e-01, PNorm = 57.4607, GNorm = 0.5874, lr_0 = 5.8270e-04
Loss = 1.0707e-01, PNorm = 57.4746, GNorm = 0.6189, lr_0 = 5.8230e-04
Loss = 1.0760e-01, PNorm = 57.4892, GNorm = 0.5493, lr_0 = 5.8190e-04
Loss = 1.2285e-01, PNorm = 57.5055, GNorm = 1.2058, lr_0 = 5.8151e-04
Loss = 9.6742e-02, PNorm = 57.5183, GNorm = 1.1059, lr_0 = 5.8111e-04
Loss = 1.1264e-01, PNorm = 57.5262, GNorm = 0.5405, lr_0 = 5.8071e-04
Loss = 1.2568e-01, PNorm = 57.5421, GNorm = 1.0424, lr_0 = 5.8031e-04
Loss = 1.1708e-01, PNorm = 57.5565, GNorm = 0.7526, lr_0 = 5.7991e-04
Loss = 9.7840e-02, PNorm = 57.5659, GNorm = 1.0154, lr_0 = 5.7952e-04
Loss = 1.2009e-01, PNorm = 57.5791, GNorm = 0.6264, lr_0 = 5.7912e-04
Loss = 1.1410e-01, PNorm = 57.5975, GNorm = 0.6004, lr_0 = 5.7872e-04
Loss = 1.0603e-01, PNorm = 57.6129, GNorm = 0.4809, lr_0 = 5.7833e-04
Loss = 1.1587e-01, PNorm = 57.6245, GNorm = 1.6198, lr_0 = 5.7793e-04
Loss = 1.0484e-01, PNorm = 57.6376, GNorm = 0.6402, lr_0 = 5.7753e-04
Loss = 1.1049e-01, PNorm = 57.6502, GNorm = 1.0382, lr_0 = 5.7714e-04
Loss = 1.2645e-01, PNorm = 57.6692, GNorm = 1.3206, lr_0 = 5.7674e-04
Loss = 1.2852e-01, PNorm = 57.6859, GNorm = 0.8352, lr_0 = 5.7635e-04
Loss = 1.1593e-01, PNorm = 57.7041, GNorm = 0.7388, lr_0 = 5.7595e-04
Loss = 9.7996e-02, PNorm = 57.7197, GNorm = 0.5669, lr_0 = 5.7556e-04
Loss = 1.0764e-01, PNorm = 57.7301, GNorm = 0.7715, lr_0 = 5.7516e-04
Loss = 9.7263e-02, PNorm = 57.7377, GNorm = 0.5220, lr_0 = 5.7477e-04
Loss = 1.0155e-01, PNorm = 57.7498, GNorm = 0.5247, lr_0 = 5.7438e-04
Loss = 1.0631e-01, PNorm = 57.7620, GNorm = 1.1037, lr_0 = 5.7398e-04
Loss = 9.4969e-02, PNorm = 57.7685, GNorm = 0.8523, lr_0 = 5.7359e-04
Loss = 1.0865e-01, PNorm = 57.7751, GNorm = 1.4714, lr_0 = 5.7320e-04
Loss = 1.0983e-01, PNorm = 57.7835, GNorm = 0.5802, lr_0 = 5.7280e-04
Loss = 1.0444e-01, PNorm = 57.7982, GNorm = 1.4224, lr_0 = 5.7241e-04
Loss = 9.6444e-02, PNorm = 57.8115, GNorm = 1.0206, lr_0 = 5.7202e-04
Loss = 1.1462e-01, PNorm = 57.8217, GNorm = 0.6608, lr_0 = 5.7163e-04
Loss = 1.0239e-01, PNorm = 57.8300, GNorm = 0.7968, lr_0 = 5.7124e-04
Loss = 1.3244e-01, PNorm = 57.8406, GNorm = 1.0659, lr_0 = 5.7084e-04
Loss = 1.2003e-01, PNorm = 57.8566, GNorm = 0.5662, lr_0 = 5.7045e-04
Loss = 9.9348e-02, PNorm = 57.8752, GNorm = 0.4024, lr_0 = 5.7006e-04
Loss = 1.2250e-01, PNorm = 57.8910, GNorm = 0.8270, lr_0 = 5.6967e-04
Loss = 1.0186e-01, PNorm = 57.9014, GNorm = 0.6925, lr_0 = 5.6928e-04
Loss = 9.7391e-02, PNorm = 57.9090, GNorm = 0.7342, lr_0 = 5.6889e-04
Loss = 1.2812e-01, PNorm = 57.9203, GNorm = 0.7275, lr_0 = 5.6850e-04
Loss = 1.0599e-01, PNorm = 57.9309, GNorm = 0.6253, lr_0 = 5.6811e-04
Loss = 1.2159e-01, PNorm = 57.9442, GNorm = 0.4649, lr_0 = 5.6772e-04
Loss = 1.0747e-01, PNorm = 57.9576, GNorm = 1.2133, lr_0 = 5.6733e-04
Loss = 1.1332e-01, PNorm = 57.9759, GNorm = 0.6493, lr_0 = 5.6695e-04
Loss = 1.1293e-01, PNorm = 57.9914, GNorm = 0.8796, lr_0 = 5.6656e-04
Loss = 9.5064e-02, PNorm = 58.0034, GNorm = 0.8082, lr_0 = 5.6617e-04
Loss = 1.0657e-01, PNorm = 58.0107, GNorm = 0.7542, lr_0 = 5.6578e-04
Loss = 1.0746e-01, PNorm = 58.0243, GNorm = 1.0497, lr_0 = 5.6539e-04
Loss = 1.0681e-01, PNorm = 58.0373, GNorm = 0.7426, lr_0 = 5.6501e-04
Loss = 1.1983e-01, PNorm = 58.0518, GNorm = 0.9772, lr_0 = 5.6462e-04
Loss = 1.1110e-01, PNorm = 58.0668, GNorm = 1.1111, lr_0 = 5.6423e-04
Loss = 9.9778e-02, PNorm = 58.0803, GNorm = 0.6962, lr_0 = 5.6385e-04
Loss = 1.0390e-01, PNorm = 58.0935, GNorm = 0.8863, lr_0 = 5.6346e-04
Loss = 1.1631e-01, PNorm = 58.1047, GNorm = 0.6289, lr_0 = 5.6307e-04
Loss = 1.1435e-01, PNorm = 58.1151, GNorm = 1.2224, lr_0 = 5.6269e-04
Loss = 1.1303e-01, PNorm = 58.1247, GNorm = 0.6230, lr_0 = 5.6230e-04
Validation mae = 0.407145
Epoch 9
Loss = 8.9757e-02, PNorm = 58.1367, GNorm = 0.5727, lr_0 = 5.6192e-04
Loss = 9.5098e-02, PNorm = 58.1543, GNorm = 0.6259, lr_0 = 5.6153e-04
Loss = 8.6397e-02, PNorm = 58.1718, GNorm = 0.7696, lr_0 = 5.6115e-04
Loss = 1.0009e-01, PNorm = 58.1898, GNorm = 0.6864, lr_0 = 5.6076e-04
Loss = 9.4018e-02, PNorm = 58.2009, GNorm = 0.5172, lr_0 = 5.6038e-04
Loss = 9.7074e-02, PNorm = 58.2196, GNorm = 0.6503, lr_0 = 5.6000e-04
Loss = 9.7453e-02, PNorm = 58.2353, GNorm = 0.7631, lr_0 = 5.5961e-04
Loss = 9.7974e-02, PNorm = 58.2503, GNorm = 0.7163, lr_0 = 5.5923e-04
Loss = 9.3857e-02, PNorm = 58.2677, GNorm = 0.8410, lr_0 = 5.5885e-04
Loss = 1.0335e-01, PNorm = 58.2829, GNorm = 1.0447, lr_0 = 5.5846e-04
Loss = 9.6241e-02, PNorm = 58.2989, GNorm = 1.2213, lr_0 = 5.5808e-04
Loss = 1.0008e-01, PNorm = 58.3152, GNorm = 1.1303, lr_0 = 5.5770e-04
Loss = 9.4200e-02, PNorm = 58.3253, GNorm = 0.7908, lr_0 = 5.5732e-04
Loss = 8.7200e-02, PNorm = 58.3400, GNorm = 0.8157, lr_0 = 5.5693e-04
Loss = 1.0020e-01, PNorm = 58.3534, GNorm = 0.4739, lr_0 = 5.5655e-04
Loss = 1.1054e-01, PNorm = 58.3648, GNorm = 0.9389, lr_0 = 5.5617e-04
Loss = 1.0884e-01, PNorm = 58.3736, GNorm = 0.8001, lr_0 = 5.5579e-04
Loss = 9.9541e-02, PNorm = 58.3850, GNorm = 1.0908, lr_0 = 5.5541e-04
Loss = 9.8777e-02, PNorm = 58.3972, GNorm = 0.6723, lr_0 = 5.5503e-04
Loss = 1.0432e-01, PNorm = 58.4097, GNorm = 1.1272, lr_0 = 5.5465e-04
Loss = 9.8611e-02, PNorm = 58.4223, GNorm = 0.4401, lr_0 = 5.5427e-04
Loss = 8.9242e-02, PNorm = 58.4344, GNorm = 0.6016, lr_0 = 5.5389e-04
Loss = 9.2026e-02, PNorm = 58.4491, GNorm = 0.8967, lr_0 = 5.5351e-04
Loss = 8.2258e-02, PNorm = 58.4632, GNorm = 0.5973, lr_0 = 5.5313e-04
Loss = 8.9366e-02, PNorm = 58.4728, GNorm = 0.7537, lr_0 = 5.5275e-04
Loss = 1.1123e-01, PNorm = 58.4855, GNorm = 0.8341, lr_0 = 5.5237e-04
Loss = 9.5815e-02, PNorm = 58.5003, GNorm = 0.9624, lr_0 = 5.5199e-04
Loss = 9.6939e-02, PNorm = 58.5166, GNorm = 0.9573, lr_0 = 5.5162e-04
Loss = 9.7412e-02, PNorm = 58.5295, GNorm = 0.9362, lr_0 = 5.5124e-04
Loss = 9.5157e-02, PNorm = 58.5414, GNorm = 0.4783, lr_0 = 5.5086e-04
Loss = 9.3261e-02, PNorm = 58.5560, GNorm = 1.1369, lr_0 = 5.5048e-04
Loss = 1.1348e-01, PNorm = 58.5755, GNorm = 0.4206, lr_0 = 5.5011e-04
Loss = 9.0423e-02, PNorm = 58.5878, GNorm = 0.9261, lr_0 = 5.4973e-04
Loss = 9.3304e-02, PNorm = 58.5991, GNorm = 0.9900, lr_0 = 5.4935e-04
Loss = 1.0775e-01, PNorm = 58.6161, GNorm = 0.9014, lr_0 = 5.4898e-04
Loss = 1.1729e-01, PNorm = 58.6306, GNorm = 0.9092, lr_0 = 5.4860e-04
Loss = 9.7490e-02, PNorm = 58.6466, GNorm = 0.6518, lr_0 = 5.4822e-04
Loss = 9.1894e-02, PNorm = 58.6535, GNorm = 0.5241, lr_0 = 5.4785e-04
Loss = 9.9238e-02, PNorm = 58.6685, GNorm = 0.4510, lr_0 = 5.4747e-04
Loss = 9.1204e-02, PNorm = 58.6850, GNorm = 0.5886, lr_0 = 5.4710e-04
Loss = 9.0404e-02, PNorm = 58.6942, GNorm = 0.5975, lr_0 = 5.4672e-04
Loss = 9.6815e-02, PNorm = 58.7071, GNorm = 1.1470, lr_0 = 5.4635e-04
Loss = 9.3652e-02, PNorm = 58.7218, GNorm = 1.0677, lr_0 = 5.4597e-04
Loss = 9.1634e-02, PNorm = 58.7347, GNorm = 0.8151, lr_0 = 5.4560e-04
Loss = 1.0451e-01, PNorm = 58.7455, GNorm = 1.0756, lr_0 = 5.4523e-04
Loss = 8.9967e-02, PNorm = 58.7604, GNorm = 0.6414, lr_0 = 5.4485e-04
Loss = 1.1022e-01, PNorm = 58.7731, GNorm = 0.5870, lr_0 = 5.4448e-04
Loss = 1.1846e-01, PNorm = 58.7885, GNorm = 0.6761, lr_0 = 5.4411e-04
Loss = 9.4234e-02, PNorm = 58.8010, GNorm = 0.7204, lr_0 = 5.4373e-04
Loss = 1.0185e-01, PNorm = 58.8193, GNorm = 0.6770, lr_0 = 5.4336e-04
Loss = 1.0775e-01, PNorm = 58.8384, GNorm = 0.6569, lr_0 = 5.4299e-04
Loss = 1.0994e-01, PNorm = 58.8509, GNorm = 1.4543, lr_0 = 5.4262e-04
Loss = 9.8519e-02, PNorm = 58.8656, GNorm = 0.8651, lr_0 = 5.4225e-04
Loss = 9.4577e-02, PNorm = 58.8804, GNorm = 0.7187, lr_0 = 5.4187e-04
Loss = 1.1121e-01, PNorm = 58.9002, GNorm = 0.9474, lr_0 = 5.4150e-04
Loss = 9.8720e-02, PNorm = 58.9141, GNorm = 0.5289, lr_0 = 5.4113e-04
Loss = 8.2428e-02, PNorm = 58.9234, GNorm = 0.6935, lr_0 = 5.4076e-04
Loss = 8.2163e-02, PNorm = 58.9335, GNorm = 0.5260, lr_0 = 5.4039e-04
Loss = 1.0015e-01, PNorm = 58.9451, GNorm = 1.0591, lr_0 = 5.4002e-04
Loss = 8.9322e-02, PNorm = 58.9562, GNorm = 0.6231, lr_0 = 5.3965e-04
Loss = 8.2076e-02, PNorm = 58.9673, GNorm = 1.3334, lr_0 = 5.3928e-04
Loss = 1.0608e-01, PNorm = 58.9822, GNorm = 0.8982, lr_0 = 5.3891e-04
Loss = 9.3608e-02, PNorm = 58.9947, GNorm = 0.6326, lr_0 = 5.3854e-04
Loss = 1.0056e-01, PNorm = 59.0138, GNorm = 0.5152, lr_0 = 5.3817e-04
Loss = 1.0071e-01, PNorm = 59.0275, GNorm = 0.8744, lr_0 = 5.3781e-04
Loss = 1.1998e-01, PNorm = 59.0400, GNorm = 0.9035, lr_0 = 5.3744e-04
Loss = 1.2815e-01, PNorm = 59.0508, GNorm = 0.5983, lr_0 = 5.3707e-04
Loss = 1.1541e-01, PNorm = 59.0659, GNorm = 0.7619, lr_0 = 5.3670e-04
Loss = 8.4591e-02, PNorm = 59.0775, GNorm = 0.4419, lr_0 = 5.3633e-04
Loss = 1.0150e-01, PNorm = 59.0931, GNorm = 0.5455, lr_0 = 5.3597e-04
Loss = 9.9905e-02, PNorm = 59.1109, GNorm = 0.5257, lr_0 = 5.3560e-04
Loss = 9.8234e-02, PNorm = 59.1229, GNorm = 0.7236, lr_0 = 5.3523e-04
Loss = 1.0146e-01, PNorm = 59.1357, GNorm = 0.4570, lr_0 = 5.3486e-04
Loss = 1.0159e-01, PNorm = 59.1503, GNorm = 0.7930, lr_0 = 5.3450e-04
Loss = 9.4088e-02, PNorm = 59.1647, GNorm = 0.5553, lr_0 = 5.3413e-04
Loss = 9.5964e-02, PNorm = 59.1753, GNorm = 0.8360, lr_0 = 5.3377e-04
Loss = 9.5727e-02, PNorm = 59.1871, GNorm = 0.5628, lr_0 = 5.3340e-04
Loss = 1.0358e-01, PNorm = 59.1991, GNorm = 1.4191, lr_0 = 5.3304e-04
Loss = 1.0081e-01, PNorm = 59.2172, GNorm = 0.6671, lr_0 = 5.3267e-04
Loss = 1.0592e-01, PNorm = 59.2319, GNorm = 1.1334, lr_0 = 5.3231e-04
Loss = 1.0035e-01, PNorm = 59.2413, GNorm = 0.7799, lr_0 = 5.3194e-04
Loss = 1.1655e-01, PNorm = 59.2504, GNorm = 0.8152, lr_0 = 5.3158e-04
Loss = 1.0827e-01, PNorm = 59.2602, GNorm = 0.7311, lr_0 = 5.3121e-04
Loss = 1.2036e-01, PNorm = 59.2759, GNorm = 1.8447, lr_0 = 5.3085e-04
Loss = 1.0143e-01, PNorm = 59.2859, GNorm = 1.1426, lr_0 = 5.3048e-04
Loss = 1.0418e-01, PNorm = 59.2915, GNorm = 0.9426, lr_0 = 5.3012e-04
Loss = 1.1016e-01, PNorm = 59.3061, GNorm = 1.5229, lr_0 = 5.2976e-04
Loss = 1.1454e-01, PNorm = 59.3143, GNorm = 0.8143, lr_0 = 5.2939e-04
Loss = 1.0915e-01, PNorm = 59.3259, GNorm = 0.7375, lr_0 = 5.2903e-04
Loss = 1.2101e-01, PNorm = 59.3420, GNorm = 1.2442, lr_0 = 5.2867e-04
Loss = 9.9287e-02, PNorm = 59.3579, GNorm = 0.5969, lr_0 = 5.2831e-04
Loss = 8.7323e-02, PNorm = 59.3671, GNorm = 0.6480, lr_0 = 5.2795e-04
Loss = 1.1309e-01, PNorm = 59.3747, GNorm = 0.5582, lr_0 = 5.2758e-04
Loss = 1.1066e-01, PNorm = 59.3876, GNorm = 0.7127, lr_0 = 5.2722e-04
Loss = 1.0117e-01, PNorm = 59.4034, GNorm = 0.4585, lr_0 = 5.2686e-04
Loss = 9.8307e-02, PNorm = 59.4160, GNorm = 0.7221, lr_0 = 5.2650e-04
Loss = 9.9633e-02, PNorm = 59.4263, GNorm = 0.5239, lr_0 = 5.2614e-04
Loss = 9.7727e-02, PNorm = 59.4348, GNorm = 1.1164, lr_0 = 5.2578e-04
Loss = 1.2631e-01, PNorm = 59.4419, GNorm = 0.8902, lr_0 = 5.2542e-04
Loss = 8.7088e-02, PNorm = 59.4531, GNorm = 0.7515, lr_0 = 5.2506e-04
Loss = 1.1729e-01, PNorm = 59.4661, GNorm = 1.5895, lr_0 = 5.2470e-04
Loss = 1.0037e-01, PNorm = 59.4765, GNorm = 0.8612, lr_0 = 5.2434e-04
Loss = 1.1457e-01, PNorm = 59.4893, GNorm = 0.7364, lr_0 = 5.2398e-04
Loss = 1.1729e-01, PNorm = 59.5074, GNorm = 1.4551, lr_0 = 5.2362e-04
Loss = 8.5275e-02, PNorm = 59.5247, GNorm = 0.5546, lr_0 = 5.2326e-04
Loss = 1.0455e-01, PNorm = 59.5362, GNorm = 0.7861, lr_0 = 5.2290e-04
Loss = 9.8503e-02, PNorm = 59.5490, GNorm = 1.2226, lr_0 = 5.2255e-04
Loss = 1.1626e-01, PNorm = 59.5557, GNorm = 0.9536, lr_0 = 5.2219e-04
Loss = 9.0278e-02, PNorm = 59.5666, GNorm = 0.9628, lr_0 = 5.2183e-04
Loss = 1.2568e-01, PNorm = 59.5768, GNorm = 2.3303, lr_0 = 5.2147e-04
Loss = 9.9440e-02, PNorm = 59.5892, GNorm = 1.2478, lr_0 = 5.2112e-04
Loss = 9.7406e-02, PNorm = 59.6025, GNorm = 0.6813, lr_0 = 5.2076e-04
Loss = 1.0282e-01, PNorm = 59.6151, GNorm = 0.6886, lr_0 = 5.2040e-04
Loss = 1.1468e-01, PNorm = 59.6290, GNorm = 0.8804, lr_0 = 5.2005e-04
Loss = 1.1042e-01, PNorm = 59.6433, GNorm = 0.8812, lr_0 = 5.1969e-04
Loss = 9.7635e-02, PNorm = 59.6502, GNorm = 0.7948, lr_0 = 5.1933e-04
Loss = 1.1451e-01, PNorm = 59.6583, GNorm = 0.6629, lr_0 = 5.1898e-04
Loss = 9.5630e-02, PNorm = 59.6710, GNorm = 0.5755, lr_0 = 5.1862e-04
Loss = 9.0351e-02, PNorm = 59.6811, GNorm = 0.5199, lr_0 = 5.1827e-04
Loss = 1.1078e-01, PNorm = 59.6927, GNorm = 1.1517, lr_0 = 5.1791e-04
Validation mae = 0.418695
Epoch 10
Loss = 9.6462e-02, PNorm = 59.7066, GNorm = 0.9370, lr_0 = 5.1756e-04
Loss = 8.7034e-02, PNorm = 59.7192, GNorm = 1.4276, lr_0 = 5.1720e-04
Loss = 8.9114e-02, PNorm = 59.7332, GNorm = 0.7555, lr_0 = 5.1685e-04
Loss = 9.7128e-02, PNorm = 59.7513, GNorm = 0.8259, lr_0 = 5.1649e-04
Loss = 8.6458e-02, PNorm = 59.7659, GNorm = 0.9030, lr_0 = 5.1614e-04
Loss = 8.8464e-02, PNorm = 59.7780, GNorm = 0.8881, lr_0 = 5.1579e-04
Loss = 9.0883e-02, PNorm = 59.7892, GNorm = 1.2317, lr_0 = 5.1543e-04
Loss = 9.0913e-02, PNorm = 59.7961, GNorm = 1.0876, lr_0 = 5.1508e-04
Loss = 8.3881e-02, PNorm = 59.8109, GNorm = 0.5042, lr_0 = 5.1473e-04
Loss = 7.8865e-02, PNorm = 59.8220, GNorm = 0.5445, lr_0 = 5.1437e-04
Loss = 9.4594e-02, PNorm = 59.8330, GNorm = 0.5630, lr_0 = 5.1402e-04
Loss = 1.0627e-01, PNorm = 59.8452, GNorm = 0.6250, lr_0 = 5.1367e-04
Loss = 9.0300e-02, PNorm = 59.8547, GNorm = 0.7297, lr_0 = 5.1332e-04
Loss = 7.5242e-02, PNorm = 59.8649, GNorm = 0.6912, lr_0 = 5.1297e-04
Loss = 8.3470e-02, PNorm = 59.8772, GNorm = 0.5009, lr_0 = 5.1262e-04
Loss = 8.7482e-02, PNorm = 59.8930, GNorm = 0.9918, lr_0 = 5.1226e-04
Loss = 9.3399e-02, PNorm = 59.9058, GNorm = 0.4996, lr_0 = 5.1191e-04
Loss = 8.8255e-02, PNorm = 59.9148, GNorm = 0.6425, lr_0 = 5.1156e-04
Loss = 8.8029e-02, PNorm = 59.9270, GNorm = 0.5650, lr_0 = 5.1121e-04
Loss = 7.6578e-02, PNorm = 59.9413, GNorm = 0.6906, lr_0 = 5.1086e-04
Loss = 8.7993e-02, PNorm = 59.9534, GNorm = 1.0462, lr_0 = 5.1051e-04
Loss = 7.9332e-02, PNorm = 59.9668, GNorm = 1.0145, lr_0 = 5.1016e-04
Loss = 1.1175e-01, PNorm = 59.9826, GNorm = 0.7789, lr_0 = 5.0981e-04
Loss = 7.5804e-02, PNorm = 59.9970, GNorm = 0.5352, lr_0 = 5.0946e-04
Loss = 8.3729e-02, PNorm = 60.0068, GNorm = 0.5673, lr_0 = 5.0911e-04
Loss = 8.5415e-02, PNorm = 60.0159, GNorm = 0.5153, lr_0 = 5.0877e-04
Loss = 8.5246e-02, PNorm = 60.0259, GNorm = 0.4762, lr_0 = 5.0842e-04
Loss = 7.5783e-02, PNorm = 60.0326, GNorm = 0.4276, lr_0 = 5.0807e-04
Loss = 9.3523e-02, PNorm = 60.0476, GNorm = 0.5971, lr_0 = 5.0772e-04
Loss = 9.5817e-02, PNorm = 60.0629, GNorm = 0.6490, lr_0 = 5.0737e-04
Loss = 9.6721e-02, PNorm = 60.0775, GNorm = 1.0474, lr_0 = 5.0703e-04
Loss = 9.9310e-02, PNorm = 60.0887, GNorm = 0.6116, lr_0 = 5.0668e-04
Loss = 8.7483e-02, PNorm = 60.0981, GNorm = 1.2474, lr_0 = 5.0633e-04
Loss = 1.1518e-01, PNorm = 60.1100, GNorm = 0.7844, lr_0 = 5.0598e-04
Loss = 9.6354e-02, PNorm = 60.1239, GNorm = 0.3988, lr_0 = 5.0564e-04
Loss = 9.1874e-02, PNorm = 60.1342, GNorm = 0.8176, lr_0 = 5.0529e-04
Loss = 8.6797e-02, PNorm = 60.1437, GNorm = 0.5172, lr_0 = 5.0494e-04
Loss = 1.0323e-01, PNorm = 60.1543, GNorm = 0.9872, lr_0 = 5.0460e-04
Loss = 1.0212e-01, PNorm = 60.1730, GNorm = 1.1305, lr_0 = 5.0425e-04
Loss = 1.0211e-01, PNorm = 60.1960, GNorm = 1.0204, lr_0 = 5.0391e-04
Loss = 9.6354e-02, PNorm = 60.2135, GNorm = 1.7207, lr_0 = 5.0356e-04
Loss = 1.2198e-01, PNorm = 60.2334, GNorm = 1.5366, lr_0 = 5.0322e-04
Loss = 9.4920e-02, PNorm = 60.2533, GNorm = 1.3990, lr_0 = 5.0287e-04
Loss = 9.7621e-02, PNorm = 60.2726, GNorm = 0.6454, lr_0 = 5.0253e-04
Loss = 8.3060e-02, PNorm = 60.2909, GNorm = 0.9829, lr_0 = 5.0218e-04
Loss = 9.8948e-02, PNorm = 60.2990, GNorm = 0.5777, lr_0 = 5.0184e-04
Loss = 9.6039e-02, PNorm = 60.3085, GNorm = 1.0341, lr_0 = 5.0150e-04
Loss = 9.2015e-02, PNorm = 60.3212, GNorm = 0.5034, lr_0 = 5.0115e-04
Loss = 9.5514e-02, PNorm = 60.3280, GNorm = 0.8856, lr_0 = 5.0081e-04
Loss = 9.4627e-02, PNorm = 60.3388, GNorm = 0.7156, lr_0 = 5.0047e-04
Loss = 9.0671e-02, PNorm = 60.3481, GNorm = 0.8256, lr_0 = 5.0012e-04
Loss = 8.2693e-02, PNorm = 60.3605, GNorm = 0.5846, lr_0 = 4.9978e-04
Loss = 9.1056e-02, PNorm = 60.3698, GNorm = 0.7796, lr_0 = 4.9944e-04
Loss = 9.0797e-02, PNorm = 60.3799, GNorm = 0.4348, lr_0 = 4.9910e-04
Loss = 9.3644e-02, PNorm = 60.3906, GNorm = 0.4583, lr_0 = 4.9875e-04
Loss = 8.6342e-02, PNorm = 60.4017, GNorm = 0.5978, lr_0 = 4.9841e-04
Loss = 9.5823e-02, PNorm = 60.4115, GNorm = 0.5335, lr_0 = 4.9807e-04
Loss = 8.8993e-02, PNorm = 60.4247, GNorm = 0.8385, lr_0 = 4.9773e-04
Loss = 9.1222e-02, PNorm = 60.4377, GNorm = 0.6475, lr_0 = 4.9739e-04
Loss = 9.8525e-02, PNorm = 60.4489, GNorm = 0.5030, lr_0 = 4.9705e-04
Loss = 8.7521e-02, PNorm = 60.4583, GNorm = 0.5252, lr_0 = 4.9671e-04
Loss = 9.4901e-02, PNorm = 60.4689, GNorm = 1.0625, lr_0 = 4.9637e-04
Loss = 1.0397e-01, PNorm = 60.4835, GNorm = 1.3473, lr_0 = 4.9603e-04
Loss = 8.4705e-02, PNorm = 60.4948, GNorm = 0.6576, lr_0 = 4.9569e-04
Loss = 9.4090e-02, PNorm = 60.5009, GNorm = 0.5952, lr_0 = 4.9535e-04
Loss = 9.7292e-02, PNorm = 60.5092, GNorm = 0.6564, lr_0 = 4.9501e-04
Loss = 8.8673e-02, PNorm = 60.5222, GNorm = 1.7936, lr_0 = 4.9467e-04
Loss = 1.0214e-01, PNorm = 60.5326, GNorm = 0.6027, lr_0 = 4.9433e-04
Loss = 8.9992e-02, PNorm = 60.5458, GNorm = 0.8264, lr_0 = 4.9399e-04
Loss = 9.5312e-02, PNorm = 60.5585, GNorm = 0.4569, lr_0 = 4.9365e-04
Loss = 8.8797e-02, PNorm = 60.5696, GNorm = 0.5700, lr_0 = 4.9332e-04
Loss = 8.6349e-02, PNorm = 60.5797, GNorm = 0.5510, lr_0 = 4.9298e-04
Loss = 1.1248e-01, PNorm = 60.5901, GNorm = 0.8079, lr_0 = 4.9264e-04
Loss = 1.0156e-01, PNorm = 60.6014, GNorm = 0.7544, lr_0 = 4.9230e-04
Loss = 1.0356e-01, PNorm = 60.6152, GNorm = 0.7182, lr_0 = 4.9197e-04
Loss = 8.4053e-02, PNorm = 60.6319, GNorm = 0.9206, lr_0 = 4.9163e-04
Loss = 8.6299e-02, PNorm = 60.6462, GNorm = 1.1036, lr_0 = 4.9129e-04
Loss = 1.0556e-01, PNorm = 60.6564, GNorm = 1.3060, lr_0 = 4.9095e-04
Loss = 9.5850e-02, PNorm = 60.6640, GNorm = 0.6539, lr_0 = 4.9062e-04
Loss = 8.5608e-02, PNorm = 60.6747, GNorm = 0.4686, lr_0 = 4.9028e-04
Loss = 9.8682e-02, PNorm = 60.6853, GNorm = 0.6631, lr_0 = 4.8995e-04
Loss = 9.7104e-02, PNorm = 60.7012, GNorm = 0.6411, lr_0 = 4.8961e-04
Loss = 8.2684e-02, PNorm = 60.7145, GNorm = 0.6943, lr_0 = 4.8928e-04
Loss = 1.0012e-01, PNorm = 60.7251, GNorm = 0.5339, lr_0 = 4.8894e-04
Loss = 1.0430e-01, PNorm = 60.7340, GNorm = 1.1691, lr_0 = 4.8861e-04
Loss = 1.0479e-01, PNorm = 60.7500, GNorm = 0.8941, lr_0 = 4.8827e-04
Loss = 1.1082e-01, PNorm = 60.7682, GNorm = 0.6796, lr_0 = 4.8794e-04
Loss = 9.9480e-02, PNorm = 60.7811, GNorm = 0.5196, lr_0 = 4.8760e-04
Loss = 1.0249e-01, PNorm = 60.7961, GNorm = 0.7430, lr_0 = 4.8727e-04
Loss = 9.3824e-02, PNorm = 60.8099, GNorm = 0.7763, lr_0 = 4.8693e-04
Loss = 9.5531e-02, PNorm = 60.8196, GNorm = 1.7975, lr_0 = 4.8660e-04
Loss = 8.7313e-02, PNorm = 60.8268, GNorm = 0.5387, lr_0 = 4.8627e-04
Loss = 1.0963e-01, PNorm = 60.8370, GNorm = 1.0140, lr_0 = 4.8593e-04
Loss = 9.5345e-02, PNorm = 60.8482, GNorm = 0.6323, lr_0 = 4.8560e-04
Loss = 8.3971e-02, PNorm = 60.8588, GNorm = 0.6605, lr_0 = 4.8527e-04
Loss = 8.6395e-02, PNorm = 60.8697, GNorm = 0.5882, lr_0 = 4.8494e-04
Loss = 8.1809e-02, PNorm = 60.8800, GNorm = 0.8953, lr_0 = 4.8460e-04
Loss = 8.2140e-02, PNorm = 60.8892, GNorm = 0.4909, lr_0 = 4.8427e-04
Loss = 8.5630e-02, PNorm = 60.9009, GNorm = 0.5463, lr_0 = 4.8394e-04
Loss = 9.4532e-02, PNorm = 60.9166, GNorm = 0.7330, lr_0 = 4.8361e-04
Loss = 9.1539e-02, PNorm = 60.9270, GNorm = 0.7870, lr_0 = 4.8328e-04
Loss = 9.2108e-02, PNorm = 60.9394, GNorm = 0.6692, lr_0 = 4.8295e-04
Loss = 9.7234e-02, PNorm = 60.9511, GNorm = 1.0240, lr_0 = 4.8262e-04
Loss = 9.2971e-02, PNorm = 60.9620, GNorm = 0.5499, lr_0 = 4.8228e-04
Loss = 1.0587e-01, PNorm = 60.9738, GNorm = 0.9762, lr_0 = 4.8195e-04
Loss = 1.1856e-01, PNorm = 60.9902, GNorm = 1.0073, lr_0 = 4.8162e-04
Loss = 9.8670e-02, PNorm = 61.0025, GNorm = 0.6496, lr_0 = 4.8129e-04
Loss = 1.0133e-01, PNorm = 61.0148, GNorm = 0.4330, lr_0 = 4.8096e-04
Loss = 1.0256e-01, PNorm = 61.0231, GNorm = 0.7804, lr_0 = 4.8064e-04
Loss = 1.0123e-01, PNorm = 61.0320, GNorm = 1.5128, lr_0 = 4.8031e-04
Loss = 8.4213e-02, PNorm = 61.0437, GNorm = 0.5721, lr_0 = 4.7998e-04
Loss = 1.0443e-01, PNorm = 61.0580, GNorm = 0.6182, lr_0 = 4.7965e-04
Loss = 9.3846e-02, PNorm = 61.0718, GNorm = 0.6274, lr_0 = 4.7932e-04
Loss = 9.6963e-02, PNorm = 61.0825, GNorm = 0.4946, lr_0 = 4.7899e-04
Loss = 1.0550e-01, PNorm = 61.0893, GNorm = 0.8545, lr_0 = 4.7866e-04
Loss = 1.0397e-01, PNorm = 61.1013, GNorm = 0.8540, lr_0 = 4.7833e-04
Loss = 1.0876e-01, PNorm = 61.1176, GNorm = 1.0854, lr_0 = 4.7801e-04
Loss = 1.1021e-01, PNorm = 61.1309, GNorm = 0.9341, lr_0 = 4.7768e-04
Loss = 9.8774e-02, PNorm = 61.1324, GNorm = 1.3659, lr_0 = 4.7735e-04
Loss = 1.1395e-01, PNorm = 61.1412, GNorm = 1.2727, lr_0 = 4.7703e-04
Validation mae = 0.420597
Epoch 11
Loss = 8.1444e-02, PNorm = 61.1565, GNorm = 0.6562, lr_0 = 4.7670e-04
Loss = 8.2867e-02, PNorm = 61.1696, GNorm = 0.4789, lr_0 = 4.7637e-04
Loss = 9.4416e-02, PNorm = 61.1828, GNorm = 0.7432, lr_0 = 4.7605e-04
Loss = 9.7602e-02, PNorm = 61.1924, GNorm = 0.6220, lr_0 = 4.7572e-04
Loss = 8.2029e-02, PNorm = 61.2053, GNorm = 0.8072, lr_0 = 4.7539e-04
Loss = 9.4446e-02, PNorm = 61.2223, GNorm = 0.4616, lr_0 = 4.7507e-04
Loss = 9.7709e-02, PNorm = 61.2393, GNorm = 0.7052, lr_0 = 4.7474e-04
Loss = 8.6932e-02, PNorm = 61.2529, GNorm = 0.9164, lr_0 = 4.7442e-04
Loss = 9.9469e-02, PNorm = 61.2685, GNorm = 0.6972, lr_0 = 4.7409e-04
Loss = 1.0012e-01, PNorm = 61.2801, GNorm = 0.6249, lr_0 = 4.7377e-04
Loss = 8.3827e-02, PNorm = 61.2902, GNorm = 0.7381, lr_0 = 4.7344e-04
Loss = 8.2502e-02, PNorm = 61.3033, GNorm = 0.5708, lr_0 = 4.7312e-04
Loss = 7.4769e-02, PNorm = 61.3165, GNorm = 0.4910, lr_0 = 4.7279e-04
Loss = 9.2652e-02, PNorm = 61.3292, GNorm = 1.6666, lr_0 = 4.7247e-04
Loss = 9.0668e-02, PNorm = 61.3437, GNorm = 0.9767, lr_0 = 4.7215e-04
Loss = 8.8992e-02, PNorm = 61.3589, GNorm = 0.5595, lr_0 = 4.7182e-04
Loss = 8.6375e-02, PNorm = 61.3706, GNorm = 1.2821, lr_0 = 4.7150e-04
Loss = 9.5948e-02, PNorm = 61.3831, GNorm = 0.5970, lr_0 = 4.7118e-04
Loss = 9.9914e-02, PNorm = 61.3959, GNorm = 0.5787, lr_0 = 4.7085e-04
Loss = 8.8961e-02, PNorm = 61.4128, GNorm = 0.9363, lr_0 = 4.7053e-04
Loss = 8.0630e-02, PNorm = 61.4181, GNorm = 0.5580, lr_0 = 4.7021e-04
Loss = 7.8376e-02, PNorm = 61.4231, GNorm = 0.6826, lr_0 = 4.6989e-04
Loss = 7.1894e-02, PNorm = 61.4326, GNorm = 0.6120, lr_0 = 4.6957e-04
Loss = 9.4326e-02, PNorm = 61.4465, GNorm = 0.6644, lr_0 = 4.6924e-04
Loss = 8.5261e-02, PNorm = 61.4595, GNorm = 0.6456, lr_0 = 4.6892e-04
Loss = 6.9739e-02, PNorm = 61.4713, GNorm = 0.4558, lr_0 = 4.6860e-04
Loss = 8.8063e-02, PNorm = 61.4795, GNorm = 1.3915, lr_0 = 4.6828e-04
Loss = 8.5390e-02, PNorm = 61.4880, GNorm = 1.0262, lr_0 = 4.6796e-04
Loss = 8.6898e-02, PNorm = 61.4967, GNorm = 0.5918, lr_0 = 4.6764e-04
Loss = 1.0515e-01, PNorm = 61.5104, GNorm = 1.7310, lr_0 = 4.6732e-04
Loss = 1.0204e-01, PNorm = 61.5253, GNorm = 1.8784, lr_0 = 4.6700e-04
Loss = 9.8756e-02, PNorm = 61.5363, GNorm = 1.2803, lr_0 = 4.6668e-04
Loss = 9.8223e-02, PNorm = 61.5467, GNorm = 0.8190, lr_0 = 4.6636e-04
Loss = 9.8727e-02, PNorm = 61.5642, GNorm = 0.6170, lr_0 = 4.6604e-04
Loss = 8.4030e-02, PNorm = 61.5789, GNorm = 0.7146, lr_0 = 4.6572e-04
Loss = 9.2555e-02, PNorm = 61.5903, GNorm = 1.6337, lr_0 = 4.6540e-04
Loss = 8.2142e-02, PNorm = 61.6001, GNorm = 0.7889, lr_0 = 4.6508e-04
Loss = 7.9457e-02, PNorm = 61.6122, GNorm = 0.5320, lr_0 = 4.6476e-04
Loss = 9.2593e-02, PNorm = 61.6213, GNorm = 0.8053, lr_0 = 4.6445e-04
Loss = 7.1320e-02, PNorm = 61.6281, GNorm = 0.5692, lr_0 = 4.6413e-04
Loss = 7.8663e-02, PNorm = 61.6320, GNorm = 0.9976, lr_0 = 4.6381e-04
Loss = 8.4297e-02, PNorm = 61.6397, GNorm = 0.7187, lr_0 = 4.6349e-04
Loss = 8.3863e-02, PNorm = 61.6508, GNorm = 0.5731, lr_0 = 4.6317e-04
Loss = 9.4437e-02, PNorm = 61.6600, GNorm = 0.6168, lr_0 = 4.6286e-04
Loss = 9.2518e-02, PNorm = 61.6697, GNorm = 0.9637, lr_0 = 4.6254e-04
Loss = 8.2885e-02, PNorm = 61.6827, GNorm = 0.4862, lr_0 = 4.6222e-04
Loss = 1.0015e-01, PNorm = 61.6946, GNorm = 0.7444, lr_0 = 4.6191e-04
Loss = 7.7314e-02, PNorm = 61.7035, GNorm = 0.5372, lr_0 = 4.6159e-04
Loss = 8.4401e-02, PNorm = 61.7137, GNorm = 0.6682, lr_0 = 4.6127e-04
Loss = 9.1862e-02, PNorm = 61.7293, GNorm = 0.9261, lr_0 = 4.6096e-04
Loss = 8.3246e-02, PNorm = 61.7421, GNorm = 0.4813, lr_0 = 4.6064e-04
Loss = 9.8291e-02, PNorm = 61.7561, GNorm = 0.7279, lr_0 = 4.6033e-04
Loss = 8.8262e-02, PNorm = 61.7694, GNorm = 0.7632, lr_0 = 4.6001e-04
Loss = 9.1755e-02, PNorm = 61.7795, GNorm = 0.5272, lr_0 = 4.5970e-04
Loss = 8.6596e-02, PNorm = 61.7903, GNorm = 1.2541, lr_0 = 4.5938e-04
Loss = 8.6719e-02, PNorm = 61.7999, GNorm = 0.5320, lr_0 = 4.5907e-04
Loss = 7.2016e-02, PNorm = 61.8096, GNorm = 0.5000, lr_0 = 4.5875e-04
Loss = 8.8157e-02, PNorm = 61.8171, GNorm = 0.7302, lr_0 = 4.5844e-04
Loss = 7.9427e-02, PNorm = 61.8255, GNorm = 0.6174, lr_0 = 4.5812e-04
Loss = 9.0909e-02, PNorm = 61.8334, GNorm = 0.7625, lr_0 = 4.5781e-04
Loss = 8.5365e-02, PNorm = 61.8407, GNorm = 0.4983, lr_0 = 4.5750e-04
Loss = 8.3600e-02, PNorm = 61.8494, GNorm = 0.4679, lr_0 = 4.5718e-04
Loss = 9.7111e-02, PNorm = 61.8576, GNorm = 0.7205, lr_0 = 4.5687e-04
Loss = 7.9673e-02, PNorm = 61.8674, GNorm = 0.4460, lr_0 = 4.5656e-04
Loss = 7.8765e-02, PNorm = 61.8763, GNorm = 0.4935, lr_0 = 4.5624e-04
Loss = 9.8391e-02, PNorm = 61.8863, GNorm = 0.8655, lr_0 = 4.5593e-04
Loss = 8.5772e-02, PNorm = 61.9017, GNorm = 0.8891, lr_0 = 4.5562e-04
Loss = 9.4252e-02, PNorm = 61.9148, GNorm = 0.6645, lr_0 = 4.5531e-04
Loss = 8.6450e-02, PNorm = 61.9240, GNorm = 1.0070, lr_0 = 4.5499e-04
Loss = 7.9799e-02, PNorm = 61.9341, GNorm = 0.8386, lr_0 = 4.5468e-04
Loss = 8.1769e-02, PNorm = 61.9428, GNorm = 0.4786, lr_0 = 4.5437e-04
Loss = 7.6458e-02, PNorm = 61.9484, GNorm = 0.6778, lr_0 = 4.5406e-04
Loss = 8.4115e-02, PNorm = 61.9598, GNorm = 0.6517, lr_0 = 4.5375e-04
Loss = 9.7961e-02, PNorm = 61.9702, GNorm = 0.8306, lr_0 = 4.5344e-04
Loss = 9.5786e-02, PNorm = 61.9807, GNorm = 1.0008, lr_0 = 4.5313e-04
Loss = 9.5092e-02, PNorm = 61.9937, GNorm = 0.9457, lr_0 = 4.5282e-04
Loss = 8.4971e-02, PNorm = 62.0081, GNorm = 0.6076, lr_0 = 4.5251e-04
Loss = 7.4721e-02, PNorm = 62.0198, GNorm = 0.4671, lr_0 = 4.5220e-04
Loss = 8.1210e-02, PNorm = 62.0298, GNorm = 0.7305, lr_0 = 4.5189e-04
Loss = 7.8049e-02, PNorm = 62.0392, GNorm = 0.6223, lr_0 = 4.5158e-04
Loss = 8.2031e-02, PNorm = 62.0491, GNorm = 0.7618, lr_0 = 4.5127e-04
Loss = 7.4686e-02, PNorm = 62.0599, GNorm = 0.3818, lr_0 = 4.5096e-04
Loss = 8.3879e-02, PNorm = 62.0667, GNorm = 0.7265, lr_0 = 4.5065e-04
Loss = 1.0111e-01, PNorm = 62.0756, GNorm = 1.3899, lr_0 = 4.5034e-04
Loss = 8.0869e-02, PNorm = 62.0827, GNorm = 0.7096, lr_0 = 4.5003e-04
Loss = 7.4456e-02, PNorm = 62.0952, GNorm = 0.5798, lr_0 = 4.4972e-04
Loss = 8.6919e-02, PNorm = 62.1037, GNorm = 0.7550, lr_0 = 4.4942e-04
Loss = 8.7525e-02, PNorm = 62.1086, GNorm = 1.0015, lr_0 = 4.4911e-04
Loss = 8.4605e-02, PNorm = 62.1147, GNorm = 0.5503, lr_0 = 4.4880e-04
Loss = 8.8194e-02, PNorm = 62.1203, GNorm = 0.8282, lr_0 = 4.4849e-04
Loss = 9.3047e-02, PNorm = 62.1280, GNorm = 0.6145, lr_0 = 4.4819e-04
Loss = 9.3188e-02, PNorm = 62.1428, GNorm = 0.5098, lr_0 = 4.4788e-04
Loss = 8.0458e-02, PNorm = 62.1514, GNorm = 0.5701, lr_0 = 4.4757e-04
Loss = 7.7769e-02, PNorm = 62.1582, GNorm = 0.7860, lr_0 = 4.4727e-04
Loss = 8.1927e-02, PNorm = 62.1682, GNorm = 0.7879, lr_0 = 4.4696e-04
Loss = 7.9031e-02, PNorm = 62.1778, GNorm = 0.6636, lr_0 = 4.4665e-04
Loss = 9.2084e-02, PNorm = 62.1846, GNorm = 0.5018, lr_0 = 4.4635e-04
Loss = 7.7270e-02, PNorm = 62.1913, GNorm = 1.0198, lr_0 = 4.4604e-04
Loss = 7.9458e-02, PNorm = 62.2005, GNorm = 0.9049, lr_0 = 4.4574e-04
Loss = 8.6196e-02, PNorm = 62.2111, GNorm = 1.4172, lr_0 = 4.4543e-04
Loss = 8.4547e-02, PNorm = 62.2212, GNorm = 1.2041, lr_0 = 4.4513e-04
Loss = 9.2542e-02, PNorm = 62.2318, GNorm = 0.9724, lr_0 = 4.4482e-04
Loss = 8.7186e-02, PNorm = 62.2449, GNorm = 0.5917, lr_0 = 4.4452e-04
Loss = 9.0383e-02, PNorm = 62.2583, GNorm = 0.5376, lr_0 = 4.4421e-04
Loss = 8.2289e-02, PNorm = 62.2690, GNorm = 0.6150, lr_0 = 4.4391e-04
Loss = 8.9507e-02, PNorm = 62.2782, GNorm = 0.5780, lr_0 = 4.4360e-04
Loss = 8.3933e-02, PNorm = 62.2885, GNorm = 0.7778, lr_0 = 4.4330e-04
Loss = 8.0852e-02, PNorm = 62.2978, GNorm = 0.6686, lr_0 = 4.4299e-04
Loss = 8.5232e-02, PNorm = 62.3044, GNorm = 0.6831, lr_0 = 4.4269e-04
Loss = 9.0513e-02, PNorm = 62.3122, GNorm = 0.6561, lr_0 = 4.4239e-04
Loss = 8.1345e-02, PNorm = 62.3200, GNorm = 1.0485, lr_0 = 4.4209e-04
Loss = 8.2414e-02, PNorm = 62.3297, GNorm = 0.5338, lr_0 = 4.4178e-04
Loss = 8.5789e-02, PNorm = 62.3408, GNorm = 0.5398, lr_0 = 4.4148e-04
Loss = 8.9848e-02, PNorm = 62.3516, GNorm = 0.9353, lr_0 = 4.4118e-04
Loss = 8.9732e-02, PNorm = 62.3564, GNorm = 0.6046, lr_0 = 4.4088e-04
Loss = 8.2943e-02, PNorm = 62.3643, GNorm = 0.6829, lr_0 = 4.4057e-04
Loss = 8.3072e-02, PNorm = 62.3705, GNorm = 0.8000, lr_0 = 4.4027e-04
Loss = 8.8302e-02, PNorm = 62.3814, GNorm = 0.7905, lr_0 = 4.3997e-04
Loss = 8.4804e-02, PNorm = 62.3884, GNorm = 0.4981, lr_0 = 4.3967e-04
Loss = 8.4951e-02, PNorm = 62.3935, GNorm = 0.5378, lr_0 = 4.3937e-04
Validation mae = 0.400941
Epoch 12
Loss = 7.2779e-02, PNorm = 62.4065, GNorm = 0.5652, lr_0 = 4.3907e-04
Loss = 8.7845e-02, PNorm = 62.4209, GNorm = 0.8637, lr_0 = 4.3877e-04
Loss = 8.3447e-02, PNorm = 62.4340, GNorm = 0.8034, lr_0 = 4.3846e-04
Loss = 6.7656e-02, PNorm = 62.4419, GNorm = 0.3952, lr_0 = 4.3816e-04
Loss = 7.0395e-02, PNorm = 62.4526, GNorm = 0.8736, lr_0 = 4.3786e-04
Loss = 6.7732e-02, PNorm = 62.4612, GNorm = 0.8063, lr_0 = 4.3756e-04
Loss = 8.4144e-02, PNorm = 62.4703, GNorm = 0.5678, lr_0 = 4.3726e-04
Loss = 8.0043e-02, PNorm = 62.4816, GNorm = 0.9080, lr_0 = 4.3696e-04
Loss = 6.5361e-02, PNorm = 62.4943, GNorm = 0.4273, lr_0 = 4.3667e-04
Loss = 8.5096e-02, PNorm = 62.5086, GNorm = 0.5665, lr_0 = 4.3637e-04
Loss = 7.7330e-02, PNorm = 62.5220, GNorm = 0.7156, lr_0 = 4.3607e-04
Loss = 7.6841e-02, PNorm = 62.5321, GNorm = 0.6318, lr_0 = 4.3577e-04
Loss = 7.4083e-02, PNorm = 62.5429, GNorm = 0.6434, lr_0 = 4.3547e-04
Loss = 7.5072e-02, PNorm = 62.5535, GNorm = 0.4874, lr_0 = 4.3517e-04
Loss = 9.3917e-02, PNorm = 62.5646, GNorm = 1.0772, lr_0 = 4.3487e-04
Loss = 8.9271e-02, PNorm = 62.5789, GNorm = 0.4129, lr_0 = 4.3458e-04
Loss = 7.3954e-02, PNorm = 62.5899, GNorm = 0.5033, lr_0 = 4.3428e-04
Loss = 8.8614e-02, PNorm = 62.6004, GNorm = 0.4988, lr_0 = 4.3398e-04
Loss = 6.3605e-02, PNorm = 62.6118, GNorm = 0.5029, lr_0 = 4.3368e-04
Loss = 7.8845e-02, PNorm = 62.6230, GNorm = 0.5076, lr_0 = 4.3339e-04
Loss = 8.1739e-02, PNorm = 62.6319, GNorm = 0.7055, lr_0 = 4.3309e-04
Loss = 7.6960e-02, PNorm = 62.6403, GNorm = 0.5584, lr_0 = 4.3279e-04
Loss = 7.4638e-02, PNorm = 62.6501, GNorm = 0.6746, lr_0 = 4.3250e-04
Loss = 8.0784e-02, PNorm = 62.6609, GNorm = 0.5526, lr_0 = 4.3220e-04
Loss = 7.8280e-02, PNorm = 62.6708, GNorm = 0.7003, lr_0 = 4.3190e-04
Loss = 6.8278e-02, PNorm = 62.6811, GNorm = 0.6038, lr_0 = 4.3161e-04
Loss = 7.1597e-02, PNorm = 62.6915, GNorm = 0.5545, lr_0 = 4.3131e-04
Loss = 7.9235e-02, PNorm = 62.7001, GNorm = 0.7070, lr_0 = 4.3102e-04
Loss = 8.1658e-02, PNorm = 62.7093, GNorm = 0.5917, lr_0 = 4.3072e-04
Loss = 7.8135e-02, PNorm = 62.7206, GNorm = 0.8177, lr_0 = 4.3043e-04
Loss = 7.2401e-02, PNorm = 62.7300, GNorm = 0.5701, lr_0 = 4.3013e-04
Loss = 8.2136e-02, PNorm = 62.7376, GNorm = 0.7763, lr_0 = 4.2984e-04
Loss = 8.0089e-02, PNorm = 62.7462, GNorm = 0.4124, lr_0 = 4.2954e-04
Loss = 7.1984e-02, PNorm = 62.7559, GNorm = 0.6926, lr_0 = 4.2925e-04
Loss = 7.6791e-02, PNorm = 62.7647, GNorm = 0.5993, lr_0 = 4.2895e-04
Loss = 6.4124e-02, PNorm = 62.7760, GNorm = 0.8299, lr_0 = 4.2866e-04
Loss = 9.1298e-02, PNorm = 62.7850, GNorm = 1.7685, lr_0 = 4.2837e-04
Loss = 7.8942e-02, PNorm = 62.7963, GNorm = 0.5821, lr_0 = 4.2807e-04
Loss = 8.6838e-02, PNorm = 62.8051, GNorm = 0.8189, lr_0 = 4.2778e-04
Loss = 8.7588e-02, PNorm = 62.8154, GNorm = 0.5822, lr_0 = 4.2749e-04
Loss = 8.2481e-02, PNorm = 62.8241, GNorm = 0.6733, lr_0 = 4.2719e-04
Loss = 6.4542e-02, PNorm = 62.8336, GNorm = 0.4666, lr_0 = 4.2690e-04
Loss = 8.6603e-02, PNorm = 62.8474, GNorm = 0.4477, lr_0 = 4.2661e-04
Loss = 9.3343e-02, PNorm = 62.8597, GNorm = 1.1697, lr_0 = 4.2632e-04
Loss = 8.9788e-02, PNorm = 62.8688, GNorm = 0.9879, lr_0 = 4.2602e-04
Loss = 8.6119e-02, PNorm = 62.8809, GNorm = 0.8912, lr_0 = 4.2573e-04
Loss = 8.5599e-02, PNorm = 62.8906, GNorm = 0.7000, lr_0 = 4.2544e-04
Loss = 7.0242e-02, PNorm = 62.8977, GNorm = 0.8315, lr_0 = 4.2515e-04
Loss = 8.9323e-02, PNorm = 62.9044, GNorm = 0.7052, lr_0 = 4.2486e-04
Loss = 8.5459e-02, PNorm = 62.9140, GNorm = 1.0423, lr_0 = 4.2457e-04
Loss = 9.0293e-02, PNorm = 62.9248, GNorm = 0.4666, lr_0 = 4.2428e-04
Loss = 7.4966e-02, PNorm = 62.9345, GNorm = 0.4624, lr_0 = 4.2399e-04
Loss = 9.2878e-02, PNorm = 62.9459, GNorm = 0.6699, lr_0 = 4.2370e-04
Loss = 8.0312e-02, PNorm = 62.9597, GNorm = 1.2193, lr_0 = 4.2340e-04
Loss = 8.3818e-02, PNorm = 62.9681, GNorm = 0.4709, lr_0 = 4.2311e-04
Loss = 8.8049e-02, PNorm = 62.9757, GNorm = 0.7604, lr_0 = 4.2283e-04
Loss = 7.8609e-02, PNorm = 62.9855, GNorm = 0.6856, lr_0 = 4.2254e-04
Loss = 8.1375e-02, PNorm = 62.9935, GNorm = 0.5245, lr_0 = 4.2225e-04
Loss = 8.7515e-02, PNorm = 63.0059, GNorm = 0.9766, lr_0 = 4.2196e-04
Loss = 8.9980e-02, PNorm = 63.0155, GNorm = 0.8256, lr_0 = 4.2167e-04
Loss = 8.5936e-02, PNorm = 63.0239, GNorm = 0.5761, lr_0 = 4.2138e-04
Loss = 8.1907e-02, PNorm = 63.0336, GNorm = 0.5947, lr_0 = 4.2109e-04
Loss = 7.8154e-02, PNorm = 63.0411, GNorm = 0.4479, lr_0 = 4.2080e-04
Loss = 6.6616e-02, PNorm = 63.0484, GNorm = 0.4826, lr_0 = 4.2051e-04
Loss = 8.5487e-02, PNorm = 63.0596, GNorm = 1.0414, lr_0 = 4.2023e-04
Loss = 8.3853e-02, PNorm = 63.0675, GNorm = 0.5186, lr_0 = 4.1994e-04
Loss = 7.0670e-02, PNorm = 63.0758, GNorm = 0.8326, lr_0 = 4.1965e-04
Loss = 8.8316e-02, PNorm = 63.0834, GNorm = 0.6478, lr_0 = 4.1936e-04
Loss = 8.3590e-02, PNorm = 63.0937, GNorm = 0.7159, lr_0 = 4.1907e-04
Loss = 8.6693e-02, PNorm = 63.1042, GNorm = 0.6396, lr_0 = 4.1879e-04
Loss = 7.1442e-02, PNorm = 63.1155, GNorm = 0.4118, lr_0 = 4.1850e-04
Loss = 7.6765e-02, PNorm = 63.1241, GNorm = 0.6419, lr_0 = 4.1821e-04
Loss = 9.0887e-02, PNorm = 63.1336, GNorm = 0.8867, lr_0 = 4.1793e-04
Loss = 9.5443e-02, PNorm = 63.1451, GNorm = 0.8599, lr_0 = 4.1764e-04
Loss = 7.7427e-02, PNorm = 63.1546, GNorm = 0.8390, lr_0 = 4.1736e-04
Loss = 8.4503e-02, PNorm = 63.1638, GNorm = 0.4345, lr_0 = 4.1707e-04
Loss = 8.2774e-02, PNorm = 63.1706, GNorm = 0.6567, lr_0 = 4.1678e-04
Loss = 8.6192e-02, PNorm = 63.1803, GNorm = 0.6218, lr_0 = 4.1650e-04
Loss = 8.7786e-02, PNorm = 63.1887, GNorm = 0.5570, lr_0 = 4.1621e-04
Loss = 7.6400e-02, PNorm = 63.1954, GNorm = 0.5703, lr_0 = 4.1593e-04
Loss = 7.1473e-02, PNorm = 63.2006, GNorm = 0.7277, lr_0 = 4.1564e-04
Loss = 6.7476e-02, PNorm = 63.2040, GNorm = 0.5419, lr_0 = 4.1536e-04
Loss = 9.1840e-02, PNorm = 63.2105, GNorm = 0.7286, lr_0 = 4.1507e-04
Loss = 8.3031e-02, PNorm = 63.2206, GNorm = 0.5538, lr_0 = 4.1479e-04
Loss = 7.8259e-02, PNorm = 63.2276, GNorm = 0.6931, lr_0 = 4.1450e-04
Loss = 7.5462e-02, PNorm = 63.2352, GNorm = 0.6949, lr_0 = 4.1422e-04
Loss = 8.8459e-02, PNorm = 63.2460, GNorm = 0.6648, lr_0 = 4.1394e-04
Loss = 7.1470e-02, PNorm = 63.2545, GNorm = 0.9191, lr_0 = 4.1365e-04
Loss = 8.3322e-02, PNorm = 63.2613, GNorm = 0.8108, lr_0 = 4.1337e-04
Loss = 7.9865e-02, PNorm = 63.2690, GNorm = 0.5105, lr_0 = 4.1309e-04
Loss = 7.5400e-02, PNorm = 63.2748, GNorm = 0.5772, lr_0 = 4.1280e-04
Loss = 7.9439e-02, PNorm = 63.2852, GNorm = 0.7841, lr_0 = 4.1252e-04
Loss = 8.2362e-02, PNorm = 63.2931, GNorm = 0.5498, lr_0 = 4.1224e-04
Loss = 8.8442e-02, PNorm = 63.3046, GNorm = 0.7579, lr_0 = 4.1196e-04
Loss = 7.4246e-02, PNorm = 63.3148, GNorm = 0.4708, lr_0 = 4.1167e-04
Loss = 9.0037e-02, PNorm = 63.3249, GNorm = 0.6329, lr_0 = 4.1139e-04
Loss = 7.9067e-02, PNorm = 63.3327, GNorm = 0.7688, lr_0 = 4.1111e-04
Loss = 8.9716e-02, PNorm = 63.3440, GNorm = 0.8823, lr_0 = 4.1083e-04
Loss = 8.5800e-02, PNorm = 63.3532, GNorm = 0.5462, lr_0 = 4.1055e-04
Loss = 7.7120e-02, PNorm = 63.3583, GNorm = 0.5217, lr_0 = 4.1027e-04
Loss = 8.2388e-02, PNorm = 63.3632, GNorm = 0.5792, lr_0 = 4.0998e-04
Loss = 7.2225e-02, PNorm = 63.3691, GNorm = 0.5242, lr_0 = 4.0970e-04
Loss = 9.7676e-02, PNorm = 63.3804, GNorm = 0.6070, lr_0 = 4.0942e-04
Loss = 8.5048e-02, PNorm = 63.3917, GNorm = 1.3623, lr_0 = 4.0914e-04
Loss = 9.0966e-02, PNorm = 63.4010, GNorm = 0.8291, lr_0 = 4.0886e-04
Loss = 8.7548e-02, PNorm = 63.4133, GNorm = 0.6530, lr_0 = 4.0858e-04
Loss = 8.3158e-02, PNorm = 63.4240, GNorm = 0.4750, lr_0 = 4.0830e-04
Loss = 1.0212e-01, PNorm = 63.4327, GNorm = 0.5172, lr_0 = 4.0802e-04
Loss = 8.5475e-02, PNorm = 63.4444, GNorm = 0.9361, lr_0 = 4.0774e-04
Loss = 7.5221e-02, PNorm = 63.4530, GNorm = 0.5722, lr_0 = 4.0746e-04
Loss = 8.2850e-02, PNorm = 63.4614, GNorm = 0.6612, lr_0 = 4.0718e-04
Loss = 8.8277e-02, PNorm = 63.4701, GNorm = 0.9413, lr_0 = 4.0691e-04
Loss = 7.2999e-02, PNorm = 63.4789, GNorm = 0.8962, lr_0 = 4.0663e-04
Loss = 8.5836e-02, PNorm = 63.4869, GNorm = 0.5252, lr_0 = 4.0635e-04
Loss = 7.6561e-02, PNorm = 63.4926, GNorm = 0.7493, lr_0 = 4.0607e-04
Loss = 9.1318e-02, PNorm = 63.5014, GNorm = 0.6491, lr_0 = 4.0579e-04
Loss = 7.9476e-02, PNorm = 63.5068, GNorm = 0.5363, lr_0 = 4.0551e-04
Loss = 7.4500e-02, PNorm = 63.5119, GNorm = 0.9683, lr_0 = 4.0524e-04
Loss = 9.0926e-02, PNorm = 63.5199, GNorm = 0.5115, lr_0 = 4.0496e-04
Loss = 8.8376e-02, PNorm = 63.5284, GNorm = 0.6730, lr_0 = 4.0468e-04
Validation mae = 0.396762
Epoch 13
Loss = 7.4727e-02, PNorm = 63.5360, GNorm = 0.9930, lr_0 = 4.0440e-04
Loss = 7.5161e-02, PNorm = 63.5465, GNorm = 1.0185, lr_0 = 4.0413e-04
Loss = 7.3750e-02, PNorm = 63.5598, GNorm = 0.5063, lr_0 = 4.0385e-04
Loss = 7.1008e-02, PNorm = 63.5696, GNorm = 0.5184, lr_0 = 4.0357e-04
Loss = 7.4210e-02, PNorm = 63.5794, GNorm = 0.7971, lr_0 = 4.0330e-04
Loss = 6.8929e-02, PNorm = 63.5906, GNorm = 0.9480, lr_0 = 4.0302e-04
Loss = 6.7615e-02, PNorm = 63.5998, GNorm = 0.5877, lr_0 = 4.0274e-04
Loss = 7.1639e-02, PNorm = 63.6089, GNorm = 0.5698, lr_0 = 4.0247e-04
Loss = 7.2044e-02, PNorm = 63.6169, GNorm = 0.5784, lr_0 = 4.0219e-04
Loss = 6.9023e-02, PNorm = 63.6241, GNorm = 0.6292, lr_0 = 4.0192e-04
Loss = 7.3163e-02, PNorm = 63.6302, GNorm = 0.6063, lr_0 = 4.0164e-04
Loss = 7.3933e-02, PNorm = 63.6423, GNorm = 0.4926, lr_0 = 4.0137e-04
Loss = 7.8150e-02, PNorm = 63.6569, GNorm = 0.4542, lr_0 = 4.0109e-04
Loss = 7.0136e-02, PNorm = 63.6663, GNorm = 0.6984, lr_0 = 4.0082e-04
Loss = 7.2963e-02, PNorm = 63.6750, GNorm = 0.6593, lr_0 = 4.0054e-04
Loss = 6.5794e-02, PNorm = 63.6835, GNorm = 0.6917, lr_0 = 4.0027e-04
Loss = 6.5029e-02, PNorm = 63.6923, GNorm = 0.6218, lr_0 = 3.9999e-04
Loss = 9.1527e-02, PNorm = 63.7013, GNorm = 0.7070, lr_0 = 3.9972e-04
Loss = 6.9752e-02, PNorm = 63.7111, GNorm = 0.5368, lr_0 = 3.9945e-04
Loss = 7.9632e-02, PNorm = 63.7175, GNorm = 0.9925, lr_0 = 3.9917e-04
Loss = 9.4264e-02, PNorm = 63.7253, GNorm = 1.3625, lr_0 = 3.9890e-04
Loss = 7.2227e-02, PNorm = 63.7414, GNorm = 0.7913, lr_0 = 3.9863e-04
Loss = 7.6801e-02, PNorm = 63.7575, GNorm = 0.6712, lr_0 = 3.9835e-04
Loss = 6.1499e-02, PNorm = 63.7656, GNorm = 0.5725, lr_0 = 3.9808e-04
Loss = 6.0195e-02, PNorm = 63.7698, GNorm = 0.3692, lr_0 = 3.9781e-04
Loss = 6.9520e-02, PNorm = 63.7762, GNorm = 0.4561, lr_0 = 3.9753e-04
Loss = 7.4685e-02, PNorm = 63.7851, GNorm = 0.4715, lr_0 = 3.9726e-04
Loss = 8.6148e-02, PNorm = 63.7975, GNorm = 0.7274, lr_0 = 3.9699e-04
Loss = 7.3098e-02, PNorm = 63.8092, GNorm = 0.9901, lr_0 = 3.9672e-04
Loss = 7.6262e-02, PNorm = 63.8161, GNorm = 0.6313, lr_0 = 3.9645e-04
Loss = 6.8517e-02, PNorm = 63.8255, GNorm = 0.6519, lr_0 = 3.9617e-04
Loss = 7.8043e-02, PNorm = 63.8339, GNorm = 1.0054, lr_0 = 3.9590e-04
Loss = 7.7409e-02, PNorm = 63.8437, GNorm = 0.5826, lr_0 = 3.9563e-04
Loss = 7.9408e-02, PNorm = 63.8506, GNorm = 0.6897, lr_0 = 3.9536e-04
Loss = 8.5576e-02, PNorm = 63.8603, GNorm = 0.6686, lr_0 = 3.9509e-04
Loss = 6.3192e-02, PNorm = 63.8688, GNorm = 0.5437, lr_0 = 3.9482e-04
Loss = 5.9768e-02, PNorm = 63.8737, GNorm = 0.6335, lr_0 = 3.9455e-04
Loss = 7.4166e-02, PNorm = 63.8798, GNorm = 0.6530, lr_0 = 3.9428e-04
Loss = 6.6925e-02, PNorm = 63.8859, GNorm = 0.8338, lr_0 = 3.9401e-04
Loss = 7.5893e-02, PNorm = 63.8946, GNorm = 0.6225, lr_0 = 3.9374e-04
Loss = 7.1351e-02, PNorm = 63.9022, GNorm = 0.9417, lr_0 = 3.9347e-04
Loss = 7.6320e-02, PNorm = 63.9121, GNorm = 0.9968, lr_0 = 3.9320e-04
Loss = 7.2530e-02, PNorm = 63.9192, GNorm = 0.6389, lr_0 = 3.9293e-04
Loss = 7.4812e-02, PNorm = 63.9294, GNorm = 0.6431, lr_0 = 3.9266e-04
Loss = 7.1198e-02, PNorm = 63.9404, GNorm = 1.0025, lr_0 = 3.9239e-04
Loss = 7.6182e-02, PNorm = 63.9517, GNorm = 0.8090, lr_0 = 3.9212e-04
Loss = 8.4395e-02, PNorm = 63.9593, GNorm = 0.6866, lr_0 = 3.9185e-04
Loss = 8.3882e-02, PNorm = 63.9676, GNorm = 0.6735, lr_0 = 3.9159e-04
Loss = 7.8386e-02, PNorm = 63.9770, GNorm = 0.9908, lr_0 = 3.9132e-04
Loss = 8.2251e-02, PNorm = 63.9847, GNorm = 1.0187, lr_0 = 3.9105e-04
Loss = 7.4797e-02, PNorm = 63.9928, GNorm = 0.6453, lr_0 = 3.9078e-04
Loss = 7.4503e-02, PNorm = 64.0013, GNorm = 0.9635, lr_0 = 3.9051e-04
Loss = 8.0215e-02, PNorm = 64.0079, GNorm = 0.5175, lr_0 = 3.9025e-04
Loss = 7.1833e-02, PNorm = 64.0142, GNorm = 0.7226, lr_0 = 3.8998e-04
Loss = 6.9310e-02, PNorm = 64.0198, GNorm = 0.6757, lr_0 = 3.8971e-04
Loss = 7.2975e-02, PNorm = 64.0290, GNorm = 0.6305, lr_0 = 3.8945e-04
Loss = 7.0771e-02, PNorm = 64.0360, GNorm = 0.9119, lr_0 = 3.8918e-04
Loss = 8.1050e-02, PNorm = 64.0417, GNorm = 0.6386, lr_0 = 3.8891e-04
Loss = 7.6153e-02, PNorm = 64.0507, GNorm = 0.6265, lr_0 = 3.8865e-04
Loss = 7.2628e-02, PNorm = 64.0644, GNorm = 0.5427, lr_0 = 3.8838e-04
Loss = 7.2170e-02, PNorm = 64.0719, GNorm = 0.4807, lr_0 = 3.8811e-04
Loss = 8.1002e-02, PNorm = 64.0791, GNorm = 0.8259, lr_0 = 3.8785e-04
Loss = 7.1315e-02, PNorm = 64.0869, GNorm = 0.6752, lr_0 = 3.8758e-04
Loss = 7.4979e-02, PNorm = 64.0946, GNorm = 0.9378, lr_0 = 3.8732e-04
Loss = 8.8045e-02, PNorm = 64.1044, GNorm = 0.5933, lr_0 = 3.8705e-04
Loss = 8.9728e-02, PNorm = 64.1130, GNorm = 0.6173, lr_0 = 3.8679e-04
Loss = 7.8856e-02, PNorm = 64.1200, GNorm = 0.7197, lr_0 = 3.8652e-04
Loss = 7.2802e-02, PNorm = 64.1266, GNorm = 0.5622, lr_0 = 3.8626e-04
Loss = 8.0070e-02, PNorm = 64.1346, GNorm = 0.6723, lr_0 = 3.8599e-04
Loss = 7.1640e-02, PNorm = 64.1432, GNorm = 0.8771, lr_0 = 3.8573e-04
Loss = 8.7876e-02, PNorm = 64.1528, GNorm = 0.9082, lr_0 = 3.8546e-04
Loss = 7.1821e-02, PNorm = 64.1609, GNorm = 0.6909, lr_0 = 3.8520e-04
Loss = 8.8481e-02, PNorm = 64.1715, GNorm = 0.5945, lr_0 = 3.8493e-04
Loss = 8.4135e-02, PNorm = 64.1822, GNorm = 1.0561, lr_0 = 3.8467e-04
Loss = 8.3860e-02, PNorm = 64.1951, GNorm = 1.2053, lr_0 = 3.8441e-04
Loss = 9.2033e-02, PNorm = 64.2075, GNorm = 0.4161, lr_0 = 3.8414e-04
Loss = 8.0751e-02, PNorm = 64.2204, GNorm = 0.5860, lr_0 = 3.8388e-04
Loss = 7.0566e-02, PNorm = 64.2338, GNorm = 0.5976, lr_0 = 3.8362e-04
Loss = 7.8414e-02, PNorm = 64.2426, GNorm = 1.2141, lr_0 = 3.8336e-04
Loss = 7.4923e-02, PNorm = 64.2466, GNorm = 0.8848, lr_0 = 3.8309e-04
Loss = 7.0025e-02, PNorm = 64.2577, GNorm = 0.7615, lr_0 = 3.8283e-04
Loss = 8.8355e-02, PNorm = 64.2659, GNorm = 0.6529, lr_0 = 3.8257e-04
Loss = 9.3844e-02, PNorm = 64.2747, GNorm = 0.4571, lr_0 = 3.8231e-04
Loss = 7.6399e-02, PNorm = 64.2822, GNorm = 0.4735, lr_0 = 3.8204e-04
Loss = 7.9703e-02, PNorm = 64.2900, GNorm = 0.8637, lr_0 = 3.8178e-04
Loss = 7.3186e-02, PNorm = 64.2997, GNorm = 0.4597, lr_0 = 3.8152e-04
Loss = 7.7150e-02, PNorm = 64.3080, GNorm = 0.8388, lr_0 = 3.8126e-04
Loss = 6.8927e-02, PNorm = 64.3170, GNorm = 0.6278, lr_0 = 3.8100e-04
Loss = 7.1537e-02, PNorm = 64.3258, GNorm = 0.5302, lr_0 = 3.8074e-04
Loss = 8.9472e-02, PNorm = 64.3322, GNorm = 0.8002, lr_0 = 3.8048e-04
Loss = 7.6754e-02, PNorm = 64.3380, GNorm = 0.4955, lr_0 = 3.8022e-04
Loss = 8.2248e-02, PNorm = 64.3436, GNorm = 0.4361, lr_0 = 3.7995e-04
Loss = 8.4058e-02, PNorm = 64.3546, GNorm = 0.7890, lr_0 = 3.7969e-04
Loss = 8.4705e-02, PNorm = 64.3666, GNorm = 0.5426, lr_0 = 3.7943e-04
Loss = 7.9163e-02, PNorm = 64.3722, GNorm = 0.5234, lr_0 = 3.7917e-04
Loss = 8.5517e-02, PNorm = 64.3783, GNorm = 0.8608, lr_0 = 3.7891e-04
Loss = 8.1170e-02, PNorm = 64.3860, GNorm = 0.7426, lr_0 = 3.7866e-04
Loss = 8.1844e-02, PNorm = 64.3963, GNorm = 0.5478, lr_0 = 3.7840e-04
Loss = 7.9900e-02, PNorm = 64.4088, GNorm = 0.5750, lr_0 = 3.7814e-04
Loss = 6.9878e-02, PNorm = 64.4220, GNorm = 0.6882, lr_0 = 3.7788e-04
Loss = 7.7154e-02, PNorm = 64.4280, GNorm = 0.5891, lr_0 = 3.7762e-04
Loss = 7.2499e-02, PNorm = 64.4331, GNorm = 0.4579, lr_0 = 3.7736e-04
Loss = 7.3330e-02, PNorm = 64.4361, GNorm = 0.5690, lr_0 = 3.7710e-04
Loss = 8.3696e-02, PNorm = 64.4399, GNorm = 0.5581, lr_0 = 3.7684e-04
Loss = 6.9305e-02, PNorm = 64.4456, GNorm = 0.4165, lr_0 = 3.7659e-04
Loss = 7.7491e-02, PNorm = 64.4539, GNorm = 0.7786, lr_0 = 3.7633e-04
Loss = 6.4942e-02, PNorm = 64.4636, GNorm = 0.5416, lr_0 = 3.7607e-04
Loss = 7.2780e-02, PNorm = 64.4713, GNorm = 0.8814, lr_0 = 3.7581e-04
Loss = 7.1726e-02, PNorm = 64.4786, GNorm = 0.9217, lr_0 = 3.7555e-04
Loss = 6.9853e-02, PNorm = 64.4850, GNorm = 0.7833, lr_0 = 3.7530e-04
Loss = 7.4836e-02, PNorm = 64.4918, GNorm = 1.1558, lr_0 = 3.7504e-04
Loss = 7.9879e-02, PNorm = 64.5009, GNorm = 0.6688, lr_0 = 3.7478e-04
Loss = 8.1426e-02, PNorm = 64.5108, GNorm = 0.4871, lr_0 = 3.7453e-04
Loss = 7.0699e-02, PNorm = 64.5190, GNorm = 0.4592, lr_0 = 3.7427e-04
Loss = 7.1811e-02, PNorm = 64.5275, GNorm = 0.5584, lr_0 = 3.7401e-04
Loss = 7.0425e-02, PNorm = 64.5346, GNorm = 0.8663, lr_0 = 3.7376e-04
Loss = 6.3991e-02, PNorm = 64.5401, GNorm = 0.5350, lr_0 = 3.7350e-04
Loss = 6.9966e-02, PNorm = 64.5455, GNorm = 0.6320, lr_0 = 3.7325e-04
Loss = 7.9785e-02, PNorm = 64.5520, GNorm = 0.5813, lr_0 = 3.7299e-04
Loss = 7.9020e-02, PNorm = 64.5625, GNorm = 0.4115, lr_0 = 3.7273e-04
Validation mae = 0.397995
Epoch 14
Loss = 5.9563e-02, PNorm = 64.5719, GNorm = 0.4571, lr_0 = 3.7248e-04
Loss = 6.2944e-02, PNorm = 64.5817, GNorm = 0.4156, lr_0 = 3.7222e-04
Loss = 6.6538e-02, PNorm = 64.5854, GNorm = 0.4095, lr_0 = 3.7197e-04
Loss = 6.8434e-02, PNorm = 64.5901, GNorm = 0.6556, lr_0 = 3.7171e-04
Loss = 5.8877e-02, PNorm = 64.5970, GNorm = 0.5424, lr_0 = 3.7146e-04
Loss = 7.0412e-02, PNorm = 64.6067, GNorm = 1.0573, lr_0 = 3.7120e-04
Loss = 7.2638e-02, PNorm = 64.6178, GNorm = 0.5406, lr_0 = 3.7095e-04
Loss = 5.6546e-02, PNorm = 64.6256, GNorm = 0.6099, lr_0 = 3.7070e-04
Loss = 5.9125e-02, PNorm = 64.6333, GNorm = 0.6773, lr_0 = 3.7044e-04
Loss = 7.0081e-02, PNorm = 64.6423, GNorm = 0.7270, lr_0 = 3.7019e-04
Loss = 7.5010e-02, PNorm = 64.6517, GNorm = 0.7122, lr_0 = 3.6993e-04
Loss = 6.7703e-02, PNorm = 64.6602, GNorm = 0.6306, lr_0 = 3.6968e-04
Loss = 6.6640e-02, PNorm = 64.6679, GNorm = 0.7346, lr_0 = 3.6943e-04
Loss = 6.1236e-02, PNorm = 64.6757, GNorm = 0.8234, lr_0 = 3.6917e-04
Loss = 6.9340e-02, PNorm = 64.6845, GNorm = 0.5531, lr_0 = 3.6892e-04
Loss = 7.1644e-02, PNorm = 64.6943, GNorm = 0.8706, lr_0 = 3.6867e-04
Loss = 7.6850e-02, PNorm = 64.7056, GNorm = 1.1433, lr_0 = 3.6842e-04
Loss = 7.0775e-02, PNorm = 64.7149, GNorm = 0.9102, lr_0 = 3.6816e-04
Loss = 6.1317e-02, PNorm = 64.7204, GNorm = 0.5656, lr_0 = 3.6791e-04
Loss = 6.1209e-02, PNorm = 64.7277, GNorm = 0.4611, lr_0 = 3.6766e-04
Loss = 6.4262e-02, PNorm = 64.7368, GNorm = 0.4995, lr_0 = 3.6741e-04
Loss = 6.1197e-02, PNorm = 64.7431, GNorm = 0.4886, lr_0 = 3.6716e-04
Loss = 7.5855e-02, PNorm = 64.7494, GNorm = 1.1676, lr_0 = 3.6690e-04
Loss = 6.7577e-02, PNorm = 64.7572, GNorm = 0.4902, lr_0 = 3.6665e-04
Loss = 7.1477e-02, PNorm = 64.7646, GNorm = 0.7792, lr_0 = 3.6640e-04
Loss = 6.2248e-02, PNorm = 64.7712, GNorm = 0.5609, lr_0 = 3.6615e-04
Loss = 5.9069e-02, PNorm = 64.7770, GNorm = 0.5631, lr_0 = 3.6590e-04
Loss = 6.8970e-02, PNorm = 64.7827, GNorm = 0.5894, lr_0 = 3.6565e-04
Loss = 6.1114e-02, PNorm = 64.7907, GNorm = 0.4159, lr_0 = 3.6540e-04
Loss = 6.6744e-02, PNorm = 64.7985, GNorm = 0.6961, lr_0 = 3.6515e-04
Loss = 7.1868e-02, PNorm = 64.8040, GNorm = 0.6044, lr_0 = 3.6490e-04
Loss = 6.9142e-02, PNorm = 64.8119, GNorm = 0.6321, lr_0 = 3.6465e-04
Loss = 6.5376e-02, PNorm = 64.8167, GNorm = 0.4853, lr_0 = 3.6440e-04
Loss = 7.5673e-02, PNorm = 64.8228, GNorm = 0.6031, lr_0 = 3.6415e-04
Loss = 7.0258e-02, PNorm = 64.8306, GNorm = 0.6038, lr_0 = 3.6390e-04
Loss = 6.8498e-02, PNorm = 64.8436, GNorm = 0.6469, lr_0 = 3.6365e-04
Loss = 6.7197e-02, PNorm = 64.8546, GNorm = 0.9317, lr_0 = 3.6340e-04
Loss = 7.7605e-02, PNorm = 64.8627, GNorm = 0.9988, lr_0 = 3.6315e-04
Loss = 6.7899e-02, PNorm = 64.8721, GNorm = 0.5872, lr_0 = 3.6290e-04
Loss = 7.3544e-02, PNorm = 64.8790, GNorm = 0.9707, lr_0 = 3.6266e-04
Loss = 6.6189e-02, PNorm = 64.8855, GNorm = 0.6169, lr_0 = 3.6241e-04
Loss = 6.7096e-02, PNorm = 64.8926, GNorm = 0.8830, lr_0 = 3.6216e-04
Loss = 6.4036e-02, PNorm = 64.9023, GNorm = 0.7930, lr_0 = 3.6191e-04
Loss = 6.3793e-02, PNorm = 64.9100, GNorm = 1.1150, lr_0 = 3.6166e-04
Loss = 8.1820e-02, PNorm = 64.9186, GNorm = 0.6879, lr_0 = 3.6141e-04
Loss = 7.1942e-02, PNorm = 64.9265, GNorm = 0.6569, lr_0 = 3.6117e-04
Loss = 6.0562e-02, PNorm = 64.9331, GNorm = 0.5642, lr_0 = 3.6092e-04
Loss = 6.3912e-02, PNorm = 64.9397, GNorm = 0.8346, lr_0 = 3.6067e-04
Loss = 6.7287e-02, PNorm = 64.9483, GNorm = 0.6021, lr_0 = 3.6043e-04
Loss = 7.7277e-02, PNorm = 64.9576, GNorm = 0.7115, lr_0 = 3.6018e-04
Loss = 7.3774e-02, PNorm = 64.9689, GNorm = 0.5965, lr_0 = 3.5993e-04
Loss = 7.0563e-02, PNorm = 64.9761, GNorm = 0.6866, lr_0 = 3.5969e-04
Loss = 8.1499e-02, PNorm = 64.9848, GNorm = 0.6669, lr_0 = 3.5944e-04
Loss = 6.8456e-02, PNorm = 64.9941, GNorm = 0.5175, lr_0 = 3.5919e-04
Loss = 6.8955e-02, PNorm = 65.0010, GNorm = 0.5119, lr_0 = 3.5895e-04
Loss = 6.5026e-02, PNorm = 65.0071, GNorm = 1.1123, lr_0 = 3.5870e-04
Loss = 7.3483e-02, PNorm = 65.0167, GNorm = 0.4141, lr_0 = 3.5845e-04
Loss = 7.7183e-02, PNorm = 65.0260, GNorm = 0.8512, lr_0 = 3.5821e-04
Loss = 6.7763e-02, PNorm = 65.0343, GNorm = 0.4850, lr_0 = 3.5796e-04
Loss = 5.6992e-02, PNorm = 65.0394, GNorm = 0.8613, lr_0 = 3.5772e-04
Loss = 7.3139e-02, PNorm = 65.0419, GNorm = 0.5734, lr_0 = 3.5747e-04
Loss = 7.6335e-02, PNorm = 65.0471, GNorm = 0.4995, lr_0 = 3.5723e-04
Loss = 8.7815e-02, PNorm = 65.0546, GNorm = 0.4484, lr_0 = 3.5698e-04
Loss = 7.3301e-02, PNorm = 65.0644, GNorm = 0.9837, lr_0 = 3.5674e-04
Loss = 7.7991e-02, PNorm = 65.0740, GNorm = 0.5831, lr_0 = 3.5650e-04
Loss = 6.2878e-02, PNorm = 65.0825, GNorm = 0.6622, lr_0 = 3.5625e-04
Loss = 7.9080e-02, PNorm = 65.0895, GNorm = 0.4728, lr_0 = 3.5601e-04
Loss = 6.1113e-02, PNorm = 65.0972, GNorm = 0.4851, lr_0 = 3.5576e-04
Loss = 6.0663e-02, PNorm = 65.1027, GNorm = 0.6138, lr_0 = 3.5552e-04
Loss = 8.0383e-02, PNorm = 65.1071, GNorm = 0.9758, lr_0 = 3.5528e-04
Loss = 7.3018e-02, PNorm = 65.1166, GNorm = 0.4776, lr_0 = 3.5503e-04
Loss = 8.2931e-02, PNorm = 65.1281, GNorm = 1.2512, lr_0 = 3.5479e-04
Loss = 8.9392e-02, PNorm = 65.1343, GNorm = 1.0773, lr_0 = 3.5455e-04
Loss = 7.2646e-02, PNorm = 65.1413, GNorm = 0.4028, lr_0 = 3.5430e-04
Loss = 7.8254e-02, PNorm = 65.1478, GNorm = 0.4863, lr_0 = 3.5406e-04
Loss = 6.4168e-02, PNorm = 65.1560, GNorm = 0.7749, lr_0 = 3.5382e-04
Loss = 6.9858e-02, PNorm = 65.1610, GNorm = 0.6492, lr_0 = 3.5358e-04
Loss = 6.5218e-02, PNorm = 65.1689, GNorm = 0.5466, lr_0 = 3.5333e-04
Loss = 6.0677e-02, PNorm = 65.1781, GNorm = 0.3857, lr_0 = 3.5309e-04
Loss = 7.0911e-02, PNorm = 65.1847, GNorm = 0.9142, lr_0 = 3.5285e-04
Loss = 7.4696e-02, PNorm = 65.1928, GNorm = 0.5304, lr_0 = 3.5261e-04
Loss = 6.9004e-02, PNorm = 65.1986, GNorm = 0.6658, lr_0 = 3.5237e-04
Loss = 7.9155e-02, PNorm = 65.2035, GNorm = 0.6941, lr_0 = 3.5212e-04
Loss = 7.5574e-02, PNorm = 65.2121, GNorm = 0.7079, lr_0 = 3.5188e-04
Loss = 6.6189e-02, PNorm = 65.2208, GNorm = 0.6947, lr_0 = 3.5164e-04
Loss = 7.4714e-02, PNorm = 65.2268, GNorm = 0.7272, lr_0 = 3.5140e-04
Loss = 7.0754e-02, PNorm = 65.2346, GNorm = 0.8889, lr_0 = 3.5116e-04
Loss = 6.9086e-02, PNorm = 65.2424, GNorm = 0.6466, lr_0 = 3.5092e-04
Loss = 7.1047e-02, PNorm = 65.2496, GNorm = 0.7758, lr_0 = 3.5068e-04
Loss = 6.7053e-02, PNorm = 65.2537, GNorm = 0.7568, lr_0 = 3.5044e-04
Loss = 7.4931e-02, PNorm = 65.2603, GNorm = 0.5260, lr_0 = 3.5020e-04
Loss = 7.4508e-02, PNorm = 65.2678, GNorm = 0.6396, lr_0 = 3.4996e-04
Loss = 7.2400e-02, PNorm = 65.2764, GNorm = 0.4882, lr_0 = 3.4972e-04
Loss = 7.6578e-02, PNorm = 65.2869, GNorm = 0.5700, lr_0 = 3.4948e-04
Loss = 7.8528e-02, PNorm = 65.2931, GNorm = 0.8162, lr_0 = 3.4924e-04
Loss = 6.3322e-02, PNorm = 65.2991, GNorm = 0.4600, lr_0 = 3.4900e-04
Loss = 7.3000e-02, PNorm = 65.3040, GNorm = 0.7667, lr_0 = 3.4876e-04
Loss = 7.1902e-02, PNorm = 65.3073, GNorm = 0.6087, lr_0 = 3.4852e-04
Loss = 7.6991e-02, PNorm = 65.3158, GNorm = 0.6876, lr_0 = 3.4828e-04
Loss = 7.7854e-02, PNorm = 65.3224, GNorm = 0.6885, lr_0 = 3.4805e-04
Loss = 7.9010e-02, PNorm = 65.3286, GNorm = 0.5419, lr_0 = 3.4781e-04
Loss = 8.2232e-02, PNorm = 65.3349, GNorm = 0.7963, lr_0 = 3.4757e-04
Loss = 6.7322e-02, PNorm = 65.3425, GNorm = 0.7749, lr_0 = 3.4733e-04
Loss = 6.6150e-02, PNorm = 65.3518, GNorm = 0.5316, lr_0 = 3.4709e-04
Loss = 7.8789e-02, PNorm = 65.3589, GNorm = 0.5145, lr_0 = 3.4686e-04
Loss = 7.2176e-02, PNorm = 65.3651, GNorm = 0.7883, lr_0 = 3.4662e-04
Loss = 6.9849e-02, PNorm = 65.3731, GNorm = 0.5682, lr_0 = 3.4638e-04
Loss = 8.0298e-02, PNorm = 65.3815, GNorm = 0.9697, lr_0 = 3.4614e-04
Loss = 6.9971e-02, PNorm = 65.3897, GNorm = 0.5200, lr_0 = 3.4591e-04
Loss = 8.3994e-02, PNorm = 65.3960, GNorm = 0.8098, lr_0 = 3.4567e-04
Loss = 7.4531e-02, PNorm = 65.4026, GNorm = 1.0149, lr_0 = 3.4543e-04
Loss = 8.0421e-02, PNorm = 65.4099, GNorm = 0.5561, lr_0 = 3.4520e-04
Loss = 7.4384e-02, PNorm = 65.4191, GNorm = 0.6225, lr_0 = 3.4496e-04
Loss = 6.2496e-02, PNorm = 65.4270, GNorm = 0.7399, lr_0 = 3.4472e-04
Loss = 6.6194e-02, PNorm = 65.4334, GNorm = 0.7192, lr_0 = 3.4449e-04
Loss = 7.5414e-02, PNorm = 65.4383, GNorm = 0.4009, lr_0 = 3.4425e-04
Loss = 6.5452e-02, PNorm = 65.4485, GNorm = 0.6457, lr_0 = 3.4402e-04
Loss = 7.1134e-02, PNorm = 65.4559, GNorm = 0.4961, lr_0 = 3.4378e-04
Loss = 6.6951e-02, PNorm = 65.4612, GNorm = 0.4984, lr_0 = 3.4354e-04
Loss = 7.7999e-02, PNorm = 65.4667, GNorm = 0.9789, lr_0 = 3.4331e-04
Validation mae = 0.403642
Epoch 15
Loss = 6.2203e-02, PNorm = 65.4750, GNorm = 0.5010, lr_0 = 3.4307e-04
Loss = 6.0027e-02, PNorm = 65.4826, GNorm = 0.6890, lr_0 = 3.4284e-04
Loss = 5.9466e-02, PNorm = 65.4887, GNorm = 0.6182, lr_0 = 3.4260e-04
Loss = 6.5533e-02, PNorm = 65.4938, GNorm = 0.4935, lr_0 = 3.4237e-04
Loss = 6.4699e-02, PNorm = 65.4992, GNorm = 0.5471, lr_0 = 3.4213e-04
Loss = 6.4980e-02, PNorm = 65.5064, GNorm = 0.4383, lr_0 = 3.4190e-04
Loss = 6.4841e-02, PNorm = 65.5165, GNorm = 0.5372, lr_0 = 3.4167e-04
Loss = 7.1469e-02, PNorm = 65.5255, GNorm = 0.6015, lr_0 = 3.4143e-04
Loss = 7.6645e-02, PNorm = 65.5341, GNorm = 0.5705, lr_0 = 3.4120e-04
Loss = 6.5748e-02, PNorm = 65.5433, GNorm = 1.2575, lr_0 = 3.4096e-04
Loss = 6.1436e-02, PNorm = 65.5482, GNorm = 0.5437, lr_0 = 3.4073e-04
Loss = 7.4931e-02, PNorm = 65.5533, GNorm = 0.8435, lr_0 = 3.4050e-04
Loss = 5.3918e-02, PNorm = 65.5614, GNorm = 0.5769, lr_0 = 3.4026e-04
Loss = 7.0303e-02, PNorm = 65.5711, GNorm = 0.6468, lr_0 = 3.4003e-04
Loss = 6.6529e-02, PNorm = 65.5809, GNorm = 0.7708, lr_0 = 3.3980e-04
Loss = 6.1615e-02, PNorm = 65.5884, GNorm = 0.5373, lr_0 = 3.3956e-04
Loss = 6.0491e-02, PNorm = 65.5944, GNorm = 0.3851, lr_0 = 3.3933e-04
Loss = 5.9719e-02, PNorm = 65.6021, GNorm = 0.7274, lr_0 = 3.3910e-04
Loss = 7.2801e-02, PNorm = 65.6096, GNorm = 0.5288, lr_0 = 3.3887e-04
Loss = 5.7474e-02, PNorm = 65.6150, GNorm = 0.5543, lr_0 = 3.3864e-04
Loss = 5.2739e-02, PNorm = 65.6203, GNorm = 0.5625, lr_0 = 3.3840e-04
Loss = 6.7134e-02, PNorm = 65.6274, GNorm = 0.6374, lr_0 = 3.3817e-04
Loss = 6.0681e-02, PNorm = 65.6359, GNorm = 0.5988, lr_0 = 3.3794e-04
Loss = 5.8456e-02, PNorm = 65.6448, GNorm = 0.5836, lr_0 = 3.3771e-04
Loss = 6.6681e-02, PNorm = 65.6520, GNorm = 0.5425, lr_0 = 3.3748e-04
Loss = 6.7526e-02, PNorm = 65.6597, GNorm = 0.6432, lr_0 = 3.3725e-04
Loss = 7.9446e-02, PNorm = 65.6688, GNorm = 0.8024, lr_0 = 3.3701e-04
Loss = 7.1606e-02, PNorm = 65.6786, GNorm = 0.6165, lr_0 = 3.3678e-04
Loss = 6.3833e-02, PNorm = 65.6857, GNorm = 0.7580, lr_0 = 3.3655e-04
Loss = 6.4261e-02, PNorm = 65.6906, GNorm = 0.5548, lr_0 = 3.3632e-04
Loss = 6.6292e-02, PNorm = 65.6971, GNorm = 0.5359, lr_0 = 3.3609e-04
Loss = 5.5475e-02, PNorm = 65.7039, GNorm = 0.6403, lr_0 = 3.3586e-04
Loss = 6.0595e-02, PNorm = 65.7104, GNorm = 0.3373, lr_0 = 3.3563e-04
Loss = 6.5603e-02, PNorm = 65.7163, GNorm = 0.6062, lr_0 = 3.3540e-04
Loss = 6.9357e-02, PNorm = 65.7234, GNorm = 0.8630, lr_0 = 3.3517e-04
Loss = 6.3771e-02, PNorm = 65.7283, GNorm = 0.4749, lr_0 = 3.3494e-04
Loss = 6.5339e-02, PNorm = 65.7322, GNorm = 0.5514, lr_0 = 3.3471e-04
Loss = 6.1289e-02, PNorm = 65.7392, GNorm = 0.9242, lr_0 = 3.3448e-04
Loss = 6.2622e-02, PNorm = 65.7434, GNorm = 0.5182, lr_0 = 3.3425e-04
Loss = 6.6510e-02, PNorm = 65.7489, GNorm = 0.5055, lr_0 = 3.3403e-04
Loss = 5.8474e-02, PNorm = 65.7538, GNorm = 0.8223, lr_0 = 3.3380e-04
Loss = 7.2416e-02, PNorm = 65.7608, GNorm = 0.8270, lr_0 = 3.3357e-04
Loss = 6.3427e-02, PNorm = 65.7689, GNorm = 0.4916, lr_0 = 3.3334e-04
Loss = 6.8340e-02, PNorm = 65.7749, GNorm = 0.7550, lr_0 = 3.3311e-04
Loss = 5.8495e-02, PNorm = 65.7808, GNorm = 0.6055, lr_0 = 3.3288e-04
Loss = 6.0940e-02, PNorm = 65.7843, GNorm = 0.4259, lr_0 = 3.3265e-04
Loss = 7.7297e-02, PNorm = 65.7905, GNorm = 0.6631, lr_0 = 3.3243e-04
Loss = 7.3571e-02, PNorm = 65.7971, GNorm = 0.4712, lr_0 = 3.3220e-04
Loss = 5.5608e-02, PNorm = 65.8031, GNorm = 0.7746, lr_0 = 3.3197e-04
Loss = 6.9558e-02, PNorm = 65.8118, GNorm = 0.4616, lr_0 = 3.3174e-04
Loss = 6.7473e-02, PNorm = 65.8156, GNorm = 0.4831, lr_0 = 3.3152e-04
Loss = 7.5820e-02, PNorm = 65.8194, GNorm = 0.4263, lr_0 = 3.3129e-04
Loss = 5.7059e-02, PNorm = 65.8251, GNorm = 0.5188, lr_0 = 3.3106e-04
Loss = 7.0020e-02, PNorm = 65.8303, GNorm = 0.7614, lr_0 = 3.3084e-04
Loss = 6.6397e-02, PNorm = 65.8396, GNorm = 0.7650, lr_0 = 3.3061e-04
Loss = 6.5343e-02, PNorm = 65.8465, GNorm = 0.8166, lr_0 = 3.3038e-04
Loss = 5.4176e-02, PNorm = 65.8502, GNorm = 0.6648, lr_0 = 3.3016e-04
Loss = 6.9435e-02, PNorm = 65.8568, GNorm = 0.4502, lr_0 = 3.2993e-04
Loss = 6.7639e-02, PNorm = 65.8629, GNorm = 0.5480, lr_0 = 3.2970e-04
Loss = 7.4227e-02, PNorm = 65.8690, GNorm = 0.7499, lr_0 = 3.2948e-04
Loss = 6.5019e-02, PNorm = 65.8755, GNorm = 0.8818, lr_0 = 3.2925e-04
Loss = 6.3954e-02, PNorm = 65.8818, GNorm = 0.9577, lr_0 = 3.2903e-04
Loss = 6.9146e-02, PNorm = 65.8884, GNorm = 0.8141, lr_0 = 3.2880e-04
Loss = 6.2241e-02, PNorm = 65.8957, GNorm = 0.6887, lr_0 = 3.2858e-04
Loss = 7.0879e-02, PNorm = 65.9049, GNorm = 1.1320, lr_0 = 3.2835e-04
Loss = 6.0953e-02, PNorm = 65.9139, GNorm = 0.5305, lr_0 = 3.2813e-04
Loss = 7.3611e-02, PNorm = 65.9221, GNorm = 0.5053, lr_0 = 3.2790e-04
Loss = 6.6290e-02, PNorm = 65.9264, GNorm = 0.6118, lr_0 = 3.2768e-04
Loss = 6.8674e-02, PNorm = 65.9315, GNorm = 0.5825, lr_0 = 3.2745e-04
Loss = 6.2549e-02, PNorm = 65.9364, GNorm = 0.5292, lr_0 = 3.2723e-04
Loss = 7.5416e-02, PNorm = 65.9410, GNorm = 0.6513, lr_0 = 3.2700e-04
Loss = 6.8104e-02, PNorm = 65.9455, GNorm = 0.4505, lr_0 = 3.2678e-04
Loss = 6.9026e-02, PNorm = 65.9506, GNorm = 0.6726, lr_0 = 3.2656e-04
Loss = 6.6822e-02, PNorm = 65.9558, GNorm = 1.0759, lr_0 = 3.2633e-04
Loss = 6.7828e-02, PNorm = 65.9599, GNorm = 0.9824, lr_0 = 3.2611e-04
Loss = 6.2309e-02, PNorm = 65.9651, GNorm = 0.8195, lr_0 = 3.2589e-04
Loss = 7.0043e-02, PNorm = 65.9701, GNorm = 0.6648, lr_0 = 3.2566e-04
Loss = 6.6742e-02, PNorm = 65.9774, GNorm = 1.0832, lr_0 = 3.2544e-04
Loss = 5.7995e-02, PNorm = 65.9837, GNorm = 0.5055, lr_0 = 3.2522e-04
Loss = 6.7606e-02, PNorm = 65.9875, GNorm = 0.6066, lr_0 = 3.2499e-04
Loss = 6.2559e-02, PNorm = 65.9921, GNorm = 0.7019, lr_0 = 3.2477e-04
Loss = 6.6731e-02, PNorm = 65.9974, GNorm = 0.7175, lr_0 = 3.2455e-04
Loss = 7.3684e-02, PNorm = 66.0024, GNorm = 0.7348, lr_0 = 3.2433e-04
Loss = 5.1932e-02, PNorm = 66.0042, GNorm = 0.6430, lr_0 = 3.2410e-04
Loss = 6.5517e-02, PNorm = 66.0096, GNorm = 0.5505, lr_0 = 3.2388e-04
Loss = 6.4907e-02, PNorm = 66.0179, GNorm = 0.5519, lr_0 = 3.2366e-04
Loss = 6.9073e-02, PNorm = 66.0248, GNorm = 0.5965, lr_0 = 3.2344e-04
Loss = 6.3209e-02, PNorm = 66.0286, GNorm = 0.5169, lr_0 = 3.2322e-04
Loss = 6.8061e-02, PNorm = 66.0339, GNorm = 0.5685, lr_0 = 3.2300e-04
Loss = 6.4876e-02, PNorm = 66.0412, GNorm = 0.9599, lr_0 = 3.2277e-04
Loss = 7.0794e-02, PNorm = 66.0489, GNorm = 0.5956, lr_0 = 3.2255e-04
Loss = 5.6014e-02, PNorm = 66.0528, GNorm = 0.3614, lr_0 = 3.2233e-04
Loss = 6.0965e-02, PNorm = 66.0574, GNorm = 0.5991, lr_0 = 3.2211e-04
Loss = 6.9873e-02, PNorm = 66.0620, GNorm = 0.4739, lr_0 = 3.2189e-04
Loss = 6.5798e-02, PNorm = 66.0693, GNorm = 0.6952, lr_0 = 3.2167e-04
Loss = 7.3405e-02, PNorm = 66.0762, GNorm = 0.6940, lr_0 = 3.2145e-04
Loss = 6.0385e-02, PNorm = 66.0817, GNorm = 0.5607, lr_0 = 3.2123e-04
Loss = 6.9081e-02, PNorm = 66.0890, GNorm = 0.6704, lr_0 = 3.2101e-04
Loss = 7.3066e-02, PNorm = 66.0967, GNorm = 0.5989, lr_0 = 3.2079e-04
Loss = 6.2806e-02, PNorm = 66.1048, GNorm = 0.4352, lr_0 = 3.2057e-04
Loss = 6.8245e-02, PNorm = 66.1103, GNorm = 0.6083, lr_0 = 3.2035e-04
Loss = 5.6522e-02, PNorm = 66.1173, GNorm = 0.6657, lr_0 = 3.2013e-04
Loss = 6.1849e-02, PNorm = 66.1236, GNorm = 0.4915, lr_0 = 3.1991e-04
Loss = 6.4371e-02, PNorm = 66.1300, GNorm = 0.8272, lr_0 = 3.1969e-04
Loss = 6.1702e-02, PNorm = 66.1362, GNorm = 0.4884, lr_0 = 3.1947e-04
Loss = 7.0906e-02, PNorm = 66.1426, GNorm = 0.8639, lr_0 = 3.1925e-04
Loss = 6.9633e-02, PNorm = 66.1489, GNorm = 0.4801, lr_0 = 3.1904e-04
Loss = 7.1371e-02, PNorm = 66.1561, GNorm = 0.5865, lr_0 = 3.1882e-04
Loss = 7.0217e-02, PNorm = 66.1666, GNorm = 0.5188, lr_0 = 3.1860e-04
Loss = 6.5685e-02, PNorm = 66.1750, GNorm = 0.7027, lr_0 = 3.1838e-04
Loss = 7.1291e-02, PNorm = 66.1812, GNorm = 0.8494, lr_0 = 3.1816e-04
Loss = 6.9972e-02, PNorm = 66.1871, GNorm = 0.7748, lr_0 = 3.1794e-04
Loss = 7.2813e-02, PNorm = 66.1928, GNorm = 0.5809, lr_0 = 3.1773e-04
Loss = 7.2550e-02, PNorm = 66.2025, GNorm = 0.7024, lr_0 = 3.1751e-04
Loss = 7.3123e-02, PNorm = 66.2103, GNorm = 0.6625, lr_0 = 3.1729e-04
Loss = 6.0867e-02, PNorm = 66.2184, GNorm = 0.4551, lr_0 = 3.1707e-04
Loss = 6.6841e-02, PNorm = 66.2232, GNorm = 0.6384, lr_0 = 3.1686e-04
Loss = 7.6450e-02, PNorm = 66.2303, GNorm = 1.0221, lr_0 = 3.1664e-04
Loss = 7.1573e-02, PNorm = 66.2374, GNorm = 0.6443, lr_0 = 3.1642e-04
Loss = 6.4820e-02, PNorm = 66.2455, GNorm = 0.9012, lr_0 = 3.1621e-04
Validation mae = 0.388750
Epoch 16
Loss = 5.5728e-02, PNorm = 66.2512, GNorm = 0.7799, lr_0 = 3.1599e-04
Loss = 6.0754e-02, PNorm = 66.2566, GNorm = 0.4608, lr_0 = 3.1577e-04
Loss = 5.6833e-02, PNorm = 66.2624, GNorm = 0.5461, lr_0 = 3.1556e-04
Loss = 5.3836e-02, PNorm = 66.2680, GNorm = 0.4514, lr_0 = 3.1534e-04
Loss = 5.5363e-02, PNorm = 66.2733, GNorm = 0.4413, lr_0 = 3.1512e-04
Loss = 6.3429e-02, PNorm = 66.2812, GNorm = 0.5256, lr_0 = 3.1491e-04
Loss = 5.9675e-02, PNorm = 66.2871, GNorm = 0.6307, lr_0 = 3.1469e-04
Loss = 5.4460e-02, PNorm = 66.2923, GNorm = 0.4688, lr_0 = 3.1448e-04
Loss = 5.2989e-02, PNorm = 66.2985, GNorm = 0.7868, lr_0 = 3.1426e-04
Loss = 6.1430e-02, PNorm = 66.3052, GNorm = 0.6126, lr_0 = 3.1405e-04
Loss = 5.0662e-02, PNorm = 66.3119, GNorm = 0.6907, lr_0 = 3.1383e-04
Loss = 5.4857e-02, PNorm = 66.3159, GNorm = 0.5304, lr_0 = 3.1362e-04
Loss = 5.4488e-02, PNorm = 66.3201, GNorm = 0.3694, lr_0 = 3.1340e-04
Loss = 6.6897e-02, PNorm = 66.3249, GNorm = 0.3827, lr_0 = 3.1319e-04
Loss = 5.3878e-02, PNorm = 66.3306, GNorm = 0.6522, lr_0 = 3.1297e-04
Loss = 5.3893e-02, PNorm = 66.3357, GNorm = 0.8488, lr_0 = 3.1276e-04
Loss = 5.4641e-02, PNorm = 66.3423, GNorm = 0.6145, lr_0 = 3.1254e-04
Loss = 6.9070e-02, PNorm = 66.3498, GNorm = 0.5843, lr_0 = 3.1233e-04
Loss = 5.3626e-02, PNorm = 66.3558, GNorm = 0.5108, lr_0 = 3.1212e-04
Loss = 5.8400e-02, PNorm = 66.3628, GNorm = 0.5882, lr_0 = 3.1190e-04
Loss = 6.5017e-02, PNorm = 66.3700, GNorm = 0.6311, lr_0 = 3.1169e-04
Loss = 6.1896e-02, PNorm = 66.3766, GNorm = 0.5194, lr_0 = 3.1147e-04
Loss = 5.3887e-02, PNorm = 66.3820, GNorm = 0.4783, lr_0 = 3.1126e-04
Loss = 6.4640e-02, PNorm = 66.3908, GNorm = 0.6687, lr_0 = 3.1105e-04
Loss = 5.4694e-02, PNorm = 66.3995, GNorm = 0.6355, lr_0 = 3.1083e-04
Loss = 5.9606e-02, PNorm = 66.4055, GNorm = 0.5195, lr_0 = 3.1062e-04
Loss = 6.5121e-02, PNorm = 66.4104, GNorm = 0.6239, lr_0 = 3.1041e-04
Loss = 5.8447e-02, PNorm = 66.4157, GNorm = 0.5469, lr_0 = 3.1020e-04
Loss = 6.3699e-02, PNorm = 66.4202, GNorm = 0.9105, lr_0 = 3.0998e-04
Loss = 6.9156e-02, PNorm = 66.4265, GNorm = 0.7908, lr_0 = 3.0977e-04
Loss = 6.6241e-02, PNorm = 66.4340, GNorm = 0.5769, lr_0 = 3.0956e-04
Loss = 5.6636e-02, PNorm = 66.4390, GNorm = 0.6230, lr_0 = 3.0935e-04
Loss = 6.0710e-02, PNorm = 66.4454, GNorm = 0.5323, lr_0 = 3.0914e-04
Loss = 6.2390e-02, PNorm = 66.4535, GNorm = 0.5118, lr_0 = 3.0892e-04
Loss = 6.2070e-02, PNorm = 66.4601, GNorm = 0.5709, lr_0 = 3.0871e-04
Loss = 6.6396e-02, PNorm = 66.4691, GNorm = 0.9645, lr_0 = 3.0850e-04
Loss = 6.0741e-02, PNorm = 66.4758, GNorm = 0.5521, lr_0 = 3.0829e-04
Loss = 5.8409e-02, PNorm = 66.4803, GNorm = 0.8337, lr_0 = 3.0808e-04
Loss = 5.6066e-02, PNorm = 66.4862, GNorm = 0.6170, lr_0 = 3.0787e-04
Loss = 6.2852e-02, PNorm = 66.4928, GNorm = 0.7230, lr_0 = 3.0766e-04
Loss = 6.2535e-02, PNorm = 66.4995, GNorm = 0.4929, lr_0 = 3.0745e-04
Loss = 6.0991e-02, PNorm = 66.5040, GNorm = 0.6122, lr_0 = 3.0723e-04
Loss = 5.9395e-02, PNorm = 66.5099, GNorm = 0.4764, lr_0 = 3.0702e-04
Loss = 5.7337e-02, PNorm = 66.5195, GNorm = 0.7501, lr_0 = 3.0681e-04
Loss = 5.7665e-02, PNorm = 66.5269, GNorm = 0.4742, lr_0 = 3.0660e-04
Loss = 6.3213e-02, PNorm = 66.5320, GNorm = 0.7923, lr_0 = 3.0639e-04
Loss = 5.8797e-02, PNorm = 66.5400, GNorm = 0.4927, lr_0 = 3.0618e-04
Loss = 6.6475e-02, PNorm = 66.5461, GNorm = 0.5336, lr_0 = 3.0597e-04
Loss = 5.5512e-02, PNorm = 66.5518, GNorm = 0.7080, lr_0 = 3.0576e-04
Loss = 5.7792e-02, PNorm = 66.5571, GNorm = 0.5544, lr_0 = 3.0555e-04
Loss = 6.4936e-02, PNorm = 66.5589, GNorm = 0.3454, lr_0 = 3.0535e-04
Loss = 7.1712e-02, PNorm = 66.5637, GNorm = 0.7264, lr_0 = 3.0514e-04
Loss = 5.9172e-02, PNorm = 66.5732, GNorm = 0.4017, lr_0 = 3.0493e-04
Loss = 6.0890e-02, PNorm = 66.5828, GNorm = 0.6147, lr_0 = 3.0472e-04
Loss = 6.1666e-02, PNorm = 66.5900, GNorm = 0.8350, lr_0 = 3.0451e-04
Loss = 6.0504e-02, PNorm = 66.5958, GNorm = 0.4882, lr_0 = 3.0430e-04
Loss = 6.0434e-02, PNorm = 66.6000, GNorm = 0.5563, lr_0 = 3.0409e-04
Loss = 6.4190e-02, PNorm = 66.6060, GNorm = 0.9289, lr_0 = 3.0388e-04
Loss = 6.5242e-02, PNorm = 66.6094, GNorm = 0.9376, lr_0 = 3.0368e-04
Loss = 6.1527e-02, PNorm = 66.6157, GNorm = 0.5897, lr_0 = 3.0347e-04
Loss = 6.5053e-02, PNorm = 66.6205, GNorm = 0.6694, lr_0 = 3.0326e-04
Loss = 5.2546e-02, PNorm = 66.6263, GNorm = 0.7375, lr_0 = 3.0305e-04
Loss = 7.2845e-02, PNorm = 66.6307, GNorm = 0.6451, lr_0 = 3.0284e-04
Loss = 6.8177e-02, PNorm = 66.6350, GNorm = 1.0748, lr_0 = 3.0264e-04
Loss = 6.1339e-02, PNorm = 66.6413, GNorm = 0.6026, lr_0 = 3.0243e-04
Loss = 6.6053e-02, PNorm = 66.6464, GNorm = 0.7093, lr_0 = 3.0222e-04
Loss = 5.9551e-02, PNorm = 66.6558, GNorm = 0.4733, lr_0 = 3.0202e-04
Loss = 5.2438e-02, PNorm = 66.6598, GNorm = 0.4668, lr_0 = 3.0181e-04
Loss = 5.6674e-02, PNorm = 66.6623, GNorm = 0.5223, lr_0 = 3.0160e-04
Loss = 6.3118e-02, PNorm = 66.6676, GNorm = 0.7216, lr_0 = 3.0140e-04
Loss = 6.8706e-02, PNorm = 66.6735, GNorm = 0.3855, lr_0 = 3.0119e-04
Loss = 6.0615e-02, PNorm = 66.6796, GNorm = 0.6074, lr_0 = 3.0098e-04
Loss = 6.2919e-02, PNorm = 66.6849, GNorm = 0.7568, lr_0 = 3.0078e-04
Loss = 6.0485e-02, PNorm = 66.6926, GNorm = 0.4617, lr_0 = 3.0057e-04
Loss = 7.2879e-02, PNorm = 66.7002, GNorm = 0.5706, lr_0 = 3.0036e-04
Loss = 6.4295e-02, PNorm = 66.7044, GNorm = 0.9811, lr_0 = 3.0016e-04
Loss = 5.8339e-02, PNorm = 66.7070, GNorm = 0.6866, lr_0 = 2.9995e-04
Loss = 6.3045e-02, PNorm = 66.7107, GNorm = 0.4804, lr_0 = 2.9975e-04
Loss = 5.8934e-02, PNorm = 66.7136, GNorm = 0.4869, lr_0 = 2.9954e-04
Loss = 6.3027e-02, PNorm = 66.7184, GNorm = 0.6664, lr_0 = 2.9934e-04
Loss = 5.4733e-02, PNorm = 66.7229, GNorm = 0.4504, lr_0 = 2.9913e-04
Loss = 7.0897e-02, PNorm = 66.7269, GNorm = 0.5105, lr_0 = 2.9893e-04
Loss = 7.3797e-02, PNorm = 66.7311, GNorm = 0.5972, lr_0 = 2.9872e-04
Loss = 6.5780e-02, PNorm = 66.7379, GNorm = 0.6666, lr_0 = 2.9852e-04
Loss = 6.1438e-02, PNorm = 66.7437, GNorm = 0.5061, lr_0 = 2.9831e-04
Loss = 6.0968e-02, PNorm = 66.7506, GNorm = 0.5731, lr_0 = 2.9811e-04
Loss = 5.7150e-02, PNorm = 66.7569, GNorm = 0.7422, lr_0 = 2.9790e-04
Loss = 6.4743e-02, PNorm = 66.7625, GNorm = 0.4407, lr_0 = 2.9770e-04
Loss = 5.3492e-02, PNorm = 66.7688, GNorm = 0.5228, lr_0 = 2.9750e-04
Loss = 6.8216e-02, PNorm = 66.7746, GNorm = 0.5882, lr_0 = 2.9729e-04
Loss = 6.2814e-02, PNorm = 66.7790, GNorm = 0.3820, lr_0 = 2.9709e-04
Loss = 6.6560e-02, PNorm = 66.7818, GNorm = 0.7738, lr_0 = 2.9689e-04
Loss = 6.4719e-02, PNorm = 66.7882, GNorm = 0.6881, lr_0 = 2.9668e-04
Loss = 5.6998e-02, PNorm = 66.7990, GNorm = 0.4200, lr_0 = 2.9648e-04
Loss = 6.0252e-02, PNorm = 66.8059, GNorm = 0.5308, lr_0 = 2.9628e-04
Loss = 5.9541e-02, PNorm = 66.8116, GNorm = 0.4583, lr_0 = 2.9607e-04
Loss = 7.1339e-02, PNorm = 66.8186, GNorm = 0.7086, lr_0 = 2.9587e-04
Loss = 6.0653e-02, PNorm = 66.8240, GNorm = 0.5588, lr_0 = 2.9567e-04
Loss = 6.2979e-02, PNorm = 66.8301, GNorm = 0.6988, lr_0 = 2.9546e-04
Loss = 5.5448e-02, PNorm = 66.8354, GNorm = 0.4311, lr_0 = 2.9526e-04
Loss = 5.7633e-02, PNorm = 66.8399, GNorm = 0.4770, lr_0 = 2.9506e-04
Loss = 6.2938e-02, PNorm = 66.8427, GNorm = 0.5354, lr_0 = 2.9486e-04
Loss = 5.4113e-02, PNorm = 66.8467, GNorm = 0.5663, lr_0 = 2.9466e-04
Loss = 6.6514e-02, PNorm = 66.8519, GNorm = 0.5552, lr_0 = 2.9445e-04
Loss = 6.2190e-02, PNorm = 66.8569, GNorm = 0.7985, lr_0 = 2.9425e-04
Loss = 6.8944e-02, PNorm = 66.8631, GNorm = 0.5243, lr_0 = 2.9405e-04
Loss = 5.8836e-02, PNorm = 66.8699, GNorm = 0.4684, lr_0 = 2.9385e-04
Loss = 5.9989e-02, PNorm = 66.8755, GNorm = 1.0285, lr_0 = 2.9365e-04
Loss = 6.6301e-02, PNorm = 66.8812, GNorm = 0.8147, lr_0 = 2.9345e-04
Loss = 5.9566e-02, PNorm = 66.8845, GNorm = 0.5587, lr_0 = 2.9325e-04
Loss = 5.8609e-02, PNorm = 66.8892, GNorm = 0.7266, lr_0 = 2.9305e-04
Loss = 5.8807e-02, PNorm = 66.8930, GNorm = 0.6743, lr_0 = 2.9284e-04
Loss = 6.7014e-02, PNorm = 66.8971, GNorm = 0.5085, lr_0 = 2.9264e-04
Loss = 6.9933e-02, PNorm = 66.9018, GNorm = 0.9079, lr_0 = 2.9244e-04
Loss = 6.7363e-02, PNorm = 66.9090, GNorm = 0.8962, lr_0 = 2.9224e-04
Loss = 6.9490e-02, PNorm = 66.9165, GNorm = 0.6693, lr_0 = 2.9204e-04
Loss = 6.9748e-02, PNorm = 66.9210, GNorm = 0.7590, lr_0 = 2.9184e-04
Loss = 6.5183e-02, PNorm = 66.9239, GNorm = 0.5429, lr_0 = 2.9164e-04
Loss = 7.0346e-02, PNorm = 66.9311, GNorm = 0.5034, lr_0 = 2.9144e-04
Loss = 6.2622e-02, PNorm = 66.9384, GNorm = 0.4679, lr_0 = 2.9124e-04
Validation mae = 0.394836
Epoch 17
Loss = 4.7103e-02, PNorm = 66.9448, GNorm = 0.5709, lr_0 = 2.9104e-04
Loss = 6.3565e-02, PNorm = 66.9518, GNorm = 0.5575, lr_0 = 2.9084e-04
Loss = 5.2181e-02, PNorm = 66.9549, GNorm = 0.5302, lr_0 = 2.9065e-04
Loss = 5.8806e-02, PNorm = 66.9574, GNorm = 0.6900, lr_0 = 2.9045e-04
Loss = 5.1784e-02, PNorm = 66.9604, GNorm = 0.4940, lr_0 = 2.9025e-04
Loss = 4.9412e-02, PNorm = 66.9652, GNorm = 0.4281, lr_0 = 2.9005e-04
Loss = 5.8386e-02, PNorm = 66.9706, GNorm = 0.6310, lr_0 = 2.8985e-04
Loss = 5.1230e-02, PNorm = 66.9775, GNorm = 0.7962, lr_0 = 2.8965e-04
Loss = 5.3621e-02, PNorm = 66.9833, GNorm = 0.8981, lr_0 = 2.8945e-04
Loss = 5.4457e-02, PNorm = 66.9920, GNorm = 0.5557, lr_0 = 2.8925e-04
Loss = 5.2785e-02, PNorm = 67.0009, GNorm = 1.0131, lr_0 = 2.8906e-04
Loss = 6.1065e-02, PNorm = 67.0065, GNorm = 0.5700, lr_0 = 2.8886e-04
Loss = 5.1057e-02, PNorm = 67.0148, GNorm = 0.5763, lr_0 = 2.8866e-04
Loss = 5.6197e-02, PNorm = 67.0197, GNorm = 1.1093, lr_0 = 2.8846e-04
Loss = 6.1292e-02, PNorm = 67.0227, GNorm = 0.4785, lr_0 = 2.8826e-04
Loss = 5.5233e-02, PNorm = 67.0271, GNorm = 0.5238, lr_0 = 2.8807e-04
Loss = 6.4180e-02, PNorm = 67.0309, GNorm = 0.7511, lr_0 = 2.8787e-04
Loss = 5.2976e-02, PNorm = 67.0351, GNorm = 0.5521, lr_0 = 2.8767e-04
Loss = 6.2431e-02, PNorm = 67.0425, GNorm = 0.7747, lr_0 = 2.8748e-04
Loss = 5.2256e-02, PNorm = 67.0465, GNorm = 0.7484, lr_0 = 2.8728e-04
Loss = 5.9338e-02, PNorm = 67.0521, GNorm = 0.8387, lr_0 = 2.8708e-04
Loss = 5.8695e-02, PNorm = 67.0579, GNorm = 0.4982, lr_0 = 2.8689e-04
Loss = 5.6273e-02, PNorm = 67.0609, GNorm = 0.5639, lr_0 = 2.8669e-04
Loss = 5.3469e-02, PNorm = 67.0643, GNorm = 0.7081, lr_0 = 2.8649e-04
Loss = 5.9303e-02, PNorm = 67.0686, GNorm = 0.5092, lr_0 = 2.8630e-04
Loss = 5.2412e-02, PNorm = 67.0734, GNorm = 0.5282, lr_0 = 2.8610e-04
Loss = 4.9558e-02, PNorm = 67.0779, GNorm = 0.4288, lr_0 = 2.8590e-04
Loss = 4.8471e-02, PNorm = 67.0806, GNorm = 0.4872, lr_0 = 2.8571e-04
Loss = 6.8451e-02, PNorm = 67.0844, GNorm = 0.4768, lr_0 = 2.8551e-04
Loss = 5.9108e-02, PNorm = 67.0915, GNorm = 0.9203, lr_0 = 2.8532e-04
Loss = 4.6145e-02, PNorm = 67.0960, GNorm = 0.6088, lr_0 = 2.8512e-04
Loss = 5.6915e-02, PNorm = 67.0976, GNorm = 0.4587, lr_0 = 2.8493e-04
Loss = 5.8997e-02, PNorm = 67.1017, GNorm = 0.6385, lr_0 = 2.8473e-04
Loss = 4.9337e-02, PNorm = 67.1051, GNorm = 0.6301, lr_0 = 2.8454e-04
Loss = 6.2268e-02, PNorm = 67.1075, GNorm = 0.4955, lr_0 = 2.8434e-04
Loss = 5.7829e-02, PNorm = 67.1141, GNorm = 0.6249, lr_0 = 2.8415e-04
Loss = 5.1725e-02, PNorm = 67.1193, GNorm = 0.9843, lr_0 = 2.8395e-04
Loss = 5.7138e-02, PNorm = 67.1238, GNorm = 0.6155, lr_0 = 2.8376e-04
Loss = 5.5685e-02, PNorm = 67.1284, GNorm = 0.6242, lr_0 = 2.8356e-04
Loss = 6.0526e-02, PNorm = 67.1334, GNorm = 0.5339, lr_0 = 2.8337e-04
Loss = 6.2481e-02, PNorm = 67.1425, GNorm = 0.7332, lr_0 = 2.8317e-04
Loss = 5.9480e-02, PNorm = 67.1513, GNorm = 0.4163, lr_0 = 2.8298e-04
Loss = 5.9118e-02, PNorm = 67.1576, GNorm = 0.5255, lr_0 = 2.8279e-04
Loss = 6.2511e-02, PNorm = 67.1631, GNorm = 0.4811, lr_0 = 2.8259e-04
Loss = 5.9301e-02, PNorm = 67.1686, GNorm = 0.7326, lr_0 = 2.8240e-04
Loss = 5.7122e-02, PNorm = 67.1726, GNorm = 0.5385, lr_0 = 2.8221e-04
Loss = 5.5847e-02, PNorm = 67.1765, GNorm = 0.4058, lr_0 = 2.8201e-04
Loss = 6.3830e-02, PNorm = 67.1824, GNorm = 0.4473, lr_0 = 2.8182e-04
Loss = 4.7056e-02, PNorm = 67.1889, GNorm = 0.6178, lr_0 = 2.8163e-04
Loss = 5.0697e-02, PNorm = 67.1961, GNorm = 0.4468, lr_0 = 2.8143e-04
Loss = 5.5889e-02, PNorm = 67.2005, GNorm = 0.4746, lr_0 = 2.8124e-04
Loss = 5.8212e-02, PNorm = 67.2065, GNorm = 0.4840, lr_0 = 2.8105e-04
Loss = 6.1430e-02, PNorm = 67.2137, GNorm = 0.7652, lr_0 = 2.8085e-04
Loss = 5.7650e-02, PNorm = 67.2218, GNorm = 0.6832, lr_0 = 2.8066e-04
Loss = 6.2193e-02, PNorm = 67.2281, GNorm = 0.4378, lr_0 = 2.8047e-04
Loss = 5.8073e-02, PNorm = 67.2337, GNorm = 0.3589, lr_0 = 2.8028e-04
Loss = 7.0013e-02, PNorm = 67.2371, GNorm = 0.6303, lr_0 = 2.8009e-04
Loss = 6.4126e-02, PNorm = 67.2392, GNorm = 0.6607, lr_0 = 2.7989e-04
Loss = 6.1583e-02, PNorm = 67.2433, GNorm = 0.6089, lr_0 = 2.7970e-04
Loss = 6.4557e-02, PNorm = 67.2514, GNorm = 0.6012, lr_0 = 2.7951e-04
Loss = 6.0868e-02, PNorm = 67.2579, GNorm = 0.8089, lr_0 = 2.7932e-04
Loss = 5.3648e-02, PNorm = 67.2634, GNorm = 0.5618, lr_0 = 2.7913e-04
Loss = 5.6774e-02, PNorm = 67.2705, GNorm = 0.6939, lr_0 = 2.7894e-04
Loss = 5.9291e-02, PNorm = 67.2748, GNorm = 0.9917, lr_0 = 2.7875e-04
Loss = 5.8147e-02, PNorm = 67.2778, GNorm = 0.6732, lr_0 = 2.7855e-04
Loss = 6.3816e-02, PNorm = 67.2808, GNorm = 0.6765, lr_0 = 2.7836e-04
Loss = 5.0109e-02, PNorm = 67.2862, GNorm = 0.7665, lr_0 = 2.7817e-04
Loss = 5.8246e-02, PNorm = 67.2949, GNorm = 0.4598, lr_0 = 2.7798e-04
Loss = 5.5657e-02, PNorm = 67.3038, GNorm = 0.4674, lr_0 = 2.7779e-04
Loss = 4.9596e-02, PNorm = 67.3095, GNorm = 0.3892, lr_0 = 2.7760e-04
Loss = 5.6278e-02, PNorm = 67.3137, GNorm = 0.5162, lr_0 = 2.7741e-04
Loss = 5.9861e-02, PNorm = 67.3189, GNorm = 0.7602, lr_0 = 2.7722e-04
Loss = 4.7971e-02, PNorm = 67.3223, GNorm = 0.4477, lr_0 = 2.7703e-04
Loss = 5.4141e-02, PNorm = 67.3237, GNorm = 0.6301, lr_0 = 2.7684e-04
Loss = 4.3324e-02, PNorm = 67.3285, GNorm = 0.4025, lr_0 = 2.7665e-04
Loss = 6.4062e-02, PNorm = 67.3301, GNorm = 0.4538, lr_0 = 2.7646e-04
Loss = 5.3904e-02, PNorm = 67.3324, GNorm = 0.3812, lr_0 = 2.7627e-04
Loss = 6.0466e-02, PNorm = 67.3375, GNorm = 0.5394, lr_0 = 2.7608e-04
Loss = 5.7148e-02, PNorm = 67.3434, GNorm = 0.6928, lr_0 = 2.7590e-04
Loss = 5.1760e-02, PNorm = 67.3469, GNorm = 0.5807, lr_0 = 2.7571e-04
Loss = 6.1771e-02, PNorm = 67.3508, GNorm = 0.5344, lr_0 = 2.7552e-04
Loss = 5.0945e-02, PNorm = 67.3543, GNorm = 0.4407, lr_0 = 2.7533e-04
Loss = 5.7153e-02, PNorm = 67.3585, GNorm = 0.8893, lr_0 = 2.7514e-04
Loss = 6.0371e-02, PNorm = 67.3630, GNorm = 0.7910, lr_0 = 2.7495e-04
Loss = 6.0421e-02, PNorm = 67.3686, GNorm = 0.7349, lr_0 = 2.7476e-04
Loss = 5.4774e-02, PNorm = 67.3741, GNorm = 0.5007, lr_0 = 2.7457e-04
Loss = 4.9001e-02, PNorm = 67.3803, GNorm = 0.6064, lr_0 = 2.7439e-04
Loss = 6.5531e-02, PNorm = 67.3857, GNorm = 0.8834, lr_0 = 2.7420e-04
Loss = 4.9062e-02, PNorm = 67.3895, GNorm = 0.3828, lr_0 = 2.7401e-04
Loss = 5.5836e-02, PNorm = 67.3928, GNorm = 0.5839, lr_0 = 2.7382e-04
Loss = 5.3348e-02, PNorm = 67.3948, GNorm = 0.4922, lr_0 = 2.7364e-04
Loss = 5.6866e-02, PNorm = 67.4003, GNorm = 0.5995, lr_0 = 2.7345e-04
Loss = 5.4574e-02, PNorm = 67.4087, GNorm = 0.7619, lr_0 = 2.7326e-04
Loss = 6.4132e-02, PNorm = 67.4159, GNorm = 0.7950, lr_0 = 2.7307e-04
Loss = 6.4426e-02, PNorm = 67.4229, GNorm = 0.4584, lr_0 = 2.7289e-04
Loss = 5.1483e-02, PNorm = 67.4290, GNorm = 0.5558, lr_0 = 2.7270e-04
Loss = 6.7467e-02, PNorm = 67.4359, GNorm = 0.5482, lr_0 = 2.7251e-04
Loss = 5.7383e-02, PNorm = 67.4399, GNorm = 0.5654, lr_0 = 2.7233e-04
Loss = 5.8925e-02, PNorm = 67.4438, GNorm = 0.7458, lr_0 = 2.7214e-04
Loss = 5.8346e-02, PNorm = 67.4464, GNorm = 0.5709, lr_0 = 2.7195e-04
Loss = 5.9702e-02, PNorm = 67.4511, GNorm = 0.7669, lr_0 = 2.7177e-04
Loss = 6.2986e-02, PNorm = 67.4573, GNorm = 0.5344, lr_0 = 2.7158e-04
Loss = 5.4415e-02, PNorm = 67.4620, GNorm = 0.6086, lr_0 = 2.7139e-04
Loss = 6.3424e-02, PNorm = 67.4664, GNorm = 0.5805, lr_0 = 2.7121e-04
Loss = 6.2087e-02, PNorm = 67.4712, GNorm = 0.6318, lr_0 = 2.7102e-04
Loss = 5.5769e-02, PNorm = 67.4776, GNorm = 0.5140, lr_0 = 2.7084e-04
Loss = 6.0601e-02, PNorm = 67.4823, GNorm = 0.5852, lr_0 = 2.7065e-04
Loss = 6.3506e-02, PNorm = 67.4870, GNorm = 0.7433, lr_0 = 2.7047e-04
Loss = 5.8814e-02, PNorm = 67.4911, GNorm = 0.5522, lr_0 = 2.7028e-04
Loss = 6.1142e-02, PNorm = 67.4982, GNorm = 0.5627, lr_0 = 2.7010e-04
Loss = 6.0520e-02, PNorm = 67.5034, GNorm = 0.4418, lr_0 = 2.6991e-04
Loss = 6.3104e-02, PNorm = 67.5076, GNorm = 0.5721, lr_0 = 2.6973e-04
Loss = 5.6644e-02, PNorm = 67.5114, GNorm = 0.4430, lr_0 = 2.6954e-04
Loss = 6.7869e-02, PNorm = 67.5157, GNorm = 0.8504, lr_0 = 2.6936e-04
Loss = 6.3675e-02, PNorm = 67.5204, GNorm = 0.5671, lr_0 = 2.6917e-04
Loss = 5.3934e-02, PNorm = 67.5267, GNorm = 0.5219, lr_0 = 2.6899e-04
Loss = 6.0064e-02, PNorm = 67.5301, GNorm = 0.5218, lr_0 = 2.6880e-04
Loss = 5.9282e-02, PNorm = 67.5350, GNorm = 0.6013, lr_0 = 2.6862e-04
Loss = 5.6237e-02, PNorm = 67.5385, GNorm = 0.9029, lr_0 = 2.6844e-04
Loss = 5.7546e-02, PNorm = 67.5419, GNorm = 0.5288, lr_0 = 2.6825e-04
Validation mae = 0.393773
Epoch 18
Loss = 5.4785e-02, PNorm = 67.5477, GNorm = 0.6020, lr_0 = 2.6807e-04
Loss = 6.3879e-02, PNorm = 67.5527, GNorm = 0.4973, lr_0 = 2.6788e-04
Loss = 5.1401e-02, PNorm = 67.5582, GNorm = 0.4476, lr_0 = 2.6770e-04
Loss = 4.8044e-02, PNorm = 67.5634, GNorm = 0.4179, lr_0 = 2.6752e-04
Loss = 4.8849e-02, PNorm = 67.5674, GNorm = 0.4831, lr_0 = 2.6733e-04
Loss = 5.1536e-02, PNorm = 67.5724, GNorm = 0.5480, lr_0 = 2.6715e-04
Loss = 4.4587e-02, PNorm = 67.5774, GNorm = 0.5182, lr_0 = 2.6697e-04
Loss = 5.0773e-02, PNorm = 67.5824, GNorm = 0.6068, lr_0 = 2.6678e-04
Loss = 5.4242e-02, PNorm = 67.5886, GNorm = 0.6424, lr_0 = 2.6660e-04
Loss = 6.7752e-02, PNorm = 67.5934, GNorm = 0.6144, lr_0 = 2.6642e-04
Loss = 4.7504e-02, PNorm = 67.5976, GNorm = 0.4857, lr_0 = 2.6624e-04
Loss = 5.2672e-02, PNorm = 67.6024, GNorm = 0.5270, lr_0 = 2.6605e-04
Loss = 5.0680e-02, PNorm = 67.6058, GNorm = 0.3559, lr_0 = 2.6587e-04
Loss = 5.0756e-02, PNorm = 67.6103, GNorm = 0.4815, lr_0 = 2.6569e-04
Loss = 4.8897e-02, PNorm = 67.6130, GNorm = 0.4776, lr_0 = 2.6551e-04
Loss = 6.3045e-02, PNorm = 67.6169, GNorm = 0.5812, lr_0 = 2.6533e-04
Loss = 5.5776e-02, PNorm = 67.6213, GNorm = 0.5826, lr_0 = 2.6514e-04
Loss = 4.5850e-02, PNorm = 67.6269, GNorm = 0.3888, lr_0 = 2.6496e-04
Loss = 4.7387e-02, PNorm = 67.6335, GNorm = 0.4281, lr_0 = 2.6478e-04
Loss = 4.5147e-02, PNorm = 67.6384, GNorm = 0.5867, lr_0 = 2.6460e-04
Loss = 4.2893e-02, PNorm = 67.6439, GNorm = 0.5086, lr_0 = 2.6442e-04
Loss = 4.8910e-02, PNorm = 67.6499, GNorm = 0.5713, lr_0 = 2.6424e-04
Loss = 5.2804e-02, PNorm = 67.6572, GNorm = 0.5794, lr_0 = 2.6406e-04
Loss = 5.6151e-02, PNorm = 67.6644, GNorm = 0.6292, lr_0 = 2.6388e-04
Loss = 4.6344e-02, PNorm = 67.6685, GNorm = 0.5749, lr_0 = 2.6369e-04
Loss = 5.1702e-02, PNorm = 67.6726, GNorm = 0.4119, lr_0 = 2.6351e-04
Loss = 4.6628e-02, PNorm = 67.6777, GNorm = 0.5777, lr_0 = 2.6333e-04
Loss = 4.9940e-02, PNorm = 67.6838, GNorm = 0.9852, lr_0 = 2.6315e-04
Loss = 6.0538e-02, PNorm = 67.6905, GNorm = 0.6498, lr_0 = 2.6297e-04
Loss = 5.9817e-02, PNorm = 67.6941, GNorm = 0.4310, lr_0 = 2.6279e-04
Loss = 6.0234e-02, PNorm = 67.6989, GNorm = 0.9493, lr_0 = 2.6261e-04
Loss = 6.2913e-02, PNorm = 67.7043, GNorm = 0.6378, lr_0 = 2.6243e-04
Loss = 4.8101e-02, PNorm = 67.7099, GNorm = 0.6766, lr_0 = 2.6225e-04
Loss = 5.2089e-02, PNorm = 67.7132, GNorm = 0.4662, lr_0 = 2.6207e-04
Loss = 5.4664e-02, PNorm = 67.7162, GNorm = 0.9028, lr_0 = 2.6189e-04
Loss = 5.2202e-02, PNorm = 67.7208, GNorm = 0.6989, lr_0 = 2.6171e-04
Loss = 5.0716e-02, PNorm = 67.7266, GNorm = 0.8108, lr_0 = 2.6153e-04
Loss = 6.0496e-02, PNorm = 67.7299, GNorm = 0.5384, lr_0 = 2.6136e-04
Loss = 5.0984e-02, PNorm = 67.7307, GNorm = 0.5050, lr_0 = 2.6118e-04
Loss = 5.5259e-02, PNorm = 67.7343, GNorm = 0.4645, lr_0 = 2.6100e-04
Loss = 6.3344e-02, PNorm = 67.7380, GNorm = 0.7214, lr_0 = 2.6082e-04
Loss = 5.0124e-02, PNorm = 67.7441, GNorm = 0.5897, lr_0 = 2.6064e-04
Loss = 4.9058e-02, PNorm = 67.7507, GNorm = 0.5020, lr_0 = 2.6046e-04
Loss = 4.9234e-02, PNorm = 67.7548, GNorm = 0.4814, lr_0 = 2.6028e-04
Loss = 5.1227e-02, PNorm = 67.7581, GNorm = 0.4242, lr_0 = 2.6011e-04
Loss = 4.9267e-02, PNorm = 67.7629, GNorm = 0.4218, lr_0 = 2.5993e-04
Loss = 5.2544e-02, PNorm = 67.7670, GNorm = 0.7893, lr_0 = 2.5975e-04
Loss = 5.2701e-02, PNorm = 67.7712, GNorm = 0.7539, lr_0 = 2.5957e-04
Loss = 5.2546e-02, PNorm = 67.7782, GNorm = 0.6158, lr_0 = 2.5939e-04
Loss = 4.6420e-02, PNorm = 67.7820, GNorm = 0.5646, lr_0 = 2.5922e-04
Loss = 5.5199e-02, PNorm = 67.7872, GNorm = 0.5605, lr_0 = 2.5904e-04
Loss = 5.2628e-02, PNorm = 67.7927, GNorm = 0.7077, lr_0 = 2.5886e-04
Loss = 4.7557e-02, PNorm = 67.7964, GNorm = 0.5558, lr_0 = 2.5868e-04
Loss = 5.5061e-02, PNorm = 67.8001, GNorm = 0.6082, lr_0 = 2.5851e-04
Loss = 5.6256e-02, PNorm = 67.8055, GNorm = 1.0664, lr_0 = 2.5833e-04
Loss = 5.4257e-02, PNorm = 67.8112, GNorm = 0.5147, lr_0 = 2.5815e-04
Loss = 6.1916e-02, PNorm = 67.8156, GNorm = 0.6561, lr_0 = 2.5797e-04
Loss = 4.9370e-02, PNorm = 67.8190, GNorm = 0.5384, lr_0 = 2.5780e-04
Loss = 5.8020e-02, PNorm = 67.8243, GNorm = 0.4060, lr_0 = 2.5762e-04
Loss = 5.3636e-02, PNorm = 67.8298, GNorm = 0.5771, lr_0 = 2.5745e-04
Loss = 5.9272e-02, PNorm = 67.8345, GNorm = 0.5950, lr_0 = 2.5727e-04
Loss = 4.8500e-02, PNorm = 67.8410, GNorm = 0.5014, lr_0 = 2.5709e-04
Loss = 5.6343e-02, PNorm = 67.8454, GNorm = 0.6820, lr_0 = 2.5692e-04
Loss = 4.7083e-02, PNorm = 67.8497, GNorm = 0.5861, lr_0 = 2.5674e-04
Loss = 5.5976e-02, PNorm = 67.8524, GNorm = 0.4880, lr_0 = 2.5656e-04
Loss = 5.7883e-02, PNorm = 67.8569, GNorm = 0.5076, lr_0 = 2.5639e-04
Loss = 5.0655e-02, PNorm = 67.8628, GNorm = 0.4857, lr_0 = 2.5621e-04
Loss = 5.8207e-02, PNorm = 67.8656, GNorm = 0.5601, lr_0 = 2.5604e-04
Loss = 5.4181e-02, PNorm = 67.8681, GNorm = 0.4389, lr_0 = 2.5586e-04
Loss = 5.2547e-02, PNorm = 67.8712, GNorm = 0.5480, lr_0 = 2.5569e-04
Loss = 4.7579e-02, PNorm = 67.8759, GNorm = 0.4039, lr_0 = 2.5551e-04
Loss = 5.8285e-02, PNorm = 67.8785, GNorm = 0.6740, lr_0 = 2.5534e-04
Loss = 5.5972e-02, PNorm = 67.8815, GNorm = 0.6100, lr_0 = 2.5516e-04
Loss = 5.3112e-02, PNorm = 67.8858, GNorm = 0.5558, lr_0 = 2.5499e-04
Loss = 5.3598e-02, PNorm = 67.8899, GNorm = 0.6359, lr_0 = 2.5481e-04
Loss = 6.0428e-02, PNorm = 67.8949, GNorm = 0.6682, lr_0 = 2.5464e-04
Loss = 5.4128e-02, PNorm = 67.9000, GNorm = 0.4938, lr_0 = 2.5446e-04
Loss = 6.4274e-02, PNorm = 67.9053, GNorm = 0.4251, lr_0 = 2.5429e-04
Loss = 5.2254e-02, PNorm = 67.9121, GNorm = 0.5081, lr_0 = 2.5411e-04
Loss = 5.4036e-02, PNorm = 67.9154, GNorm = 0.6959, lr_0 = 2.5394e-04
Loss = 5.0050e-02, PNorm = 67.9191, GNorm = 0.4285, lr_0 = 2.5377e-04
Loss = 6.6151e-02, PNorm = 67.9234, GNorm = 0.7060, lr_0 = 2.5359e-04
Loss = 5.6612e-02, PNorm = 67.9265, GNorm = 0.9612, lr_0 = 2.5342e-04
Loss = 5.1458e-02, PNorm = 67.9300, GNorm = 0.6887, lr_0 = 2.5325e-04
Loss = 6.2547e-02, PNorm = 67.9351, GNorm = 1.0225, lr_0 = 2.5307e-04
Loss = 5.2874e-02, PNorm = 67.9401, GNorm = 0.5059, lr_0 = 2.5290e-04
Loss = 5.9740e-02, PNorm = 67.9442, GNorm = 0.4163, lr_0 = 2.5273e-04
Loss = 5.6921e-02, PNorm = 67.9483, GNorm = 0.4451, lr_0 = 2.5255e-04
Loss = 5.6113e-02, PNorm = 67.9517, GNorm = 0.7242, lr_0 = 2.5238e-04
Loss = 5.7428e-02, PNorm = 67.9560, GNorm = 0.5128, lr_0 = 2.5221e-04
Loss = 5.3195e-02, PNorm = 67.9611, GNorm = 0.6004, lr_0 = 2.5203e-04
Loss = 5.2737e-02, PNorm = 67.9653, GNorm = 0.5935, lr_0 = 2.5186e-04
Loss = 5.4317e-02, PNorm = 67.9721, GNorm = 0.5936, lr_0 = 2.5169e-04
Loss = 5.2101e-02, PNorm = 67.9754, GNorm = 0.6067, lr_0 = 2.5152e-04
Loss = 4.5717e-02, PNorm = 67.9784, GNorm = 0.5161, lr_0 = 2.5134e-04
Loss = 5.5366e-02, PNorm = 67.9835, GNorm = 0.6104, lr_0 = 2.5117e-04
Loss = 6.1304e-02, PNorm = 67.9875, GNorm = 0.6264, lr_0 = 2.5100e-04
Loss = 5.3460e-02, PNorm = 67.9935, GNorm = 0.5628, lr_0 = 2.5083e-04
Loss = 5.1894e-02, PNorm = 67.9969, GNorm = 0.5518, lr_0 = 2.5066e-04
Loss = 5.4180e-02, PNorm = 68.0007, GNorm = 0.4151, lr_0 = 2.5048e-04
Loss = 5.3468e-02, PNorm = 68.0051, GNorm = 0.4816, lr_0 = 2.5031e-04
Loss = 5.8121e-02, PNorm = 68.0095, GNorm = 0.5871, lr_0 = 2.5014e-04
Loss = 5.7921e-02, PNorm = 68.0144, GNorm = 0.4852, lr_0 = 2.4997e-04
Loss = 5.2775e-02, PNorm = 68.0210, GNorm = 0.5478, lr_0 = 2.4980e-04
Loss = 5.7034e-02, PNorm = 68.0261, GNorm = 0.6647, lr_0 = 2.4963e-04
Loss = 6.8509e-02, PNorm = 68.0285, GNorm = 0.6700, lr_0 = 2.4946e-04
Loss = 5.4644e-02, PNorm = 68.0317, GNorm = 0.4203, lr_0 = 2.4929e-04
Loss = 6.0600e-02, PNorm = 68.0354, GNorm = 0.9440, lr_0 = 2.4911e-04
Loss = 5.4660e-02, PNorm = 68.0383, GNorm = 0.8454, lr_0 = 2.4894e-04
Loss = 5.9754e-02, PNorm = 68.0421, GNorm = 0.9364, lr_0 = 2.4877e-04
Loss = 6.1155e-02, PNorm = 68.0457, GNorm = 0.4088, lr_0 = 2.4860e-04
Loss = 6.8625e-02, PNorm = 68.0506, GNorm = 0.8954, lr_0 = 2.4843e-04
Loss = 4.9862e-02, PNorm = 68.0550, GNorm = 0.3910, lr_0 = 2.4826e-04
Loss = 6.3280e-02, PNorm = 68.0584, GNorm = 0.5609, lr_0 = 2.4809e-04
Loss = 5.0986e-02, PNorm = 68.0618, GNorm = 0.5419, lr_0 = 2.4792e-04
Loss = 5.3105e-02, PNorm = 68.0645, GNorm = 0.7264, lr_0 = 2.4775e-04
Loss = 5.0375e-02, PNorm = 68.0694, GNorm = 0.6294, lr_0 = 2.4758e-04
Loss = 5.2522e-02, PNorm = 68.0734, GNorm = 0.6129, lr_0 = 2.4741e-04
Loss = 6.1123e-02, PNorm = 68.0759, GNorm = 0.6279, lr_0 = 2.4724e-04
Loss = 5.1761e-02, PNorm = 68.0785, GNorm = 0.5217, lr_0 = 2.4707e-04
Validation mae = 0.392873
Epoch 19
Loss = 5.1506e-02, PNorm = 68.0822, GNorm = 0.7407, lr_0 = 2.4690e-04
Loss = 5.1550e-02, PNorm = 68.0886, GNorm = 0.4690, lr_0 = 2.4674e-04
Loss = 4.4843e-02, PNorm = 68.0925, GNorm = 0.4206, lr_0 = 2.4657e-04
Loss = 4.4211e-02, PNorm = 68.0957, GNorm = 0.4351, lr_0 = 2.4640e-04
Loss = 5.8273e-02, PNorm = 68.1020, GNorm = 0.5850, lr_0 = 2.4623e-04
Loss = 4.7131e-02, PNorm = 68.1072, GNorm = 0.3966, lr_0 = 2.4606e-04
Loss = 4.7136e-02, PNorm = 68.1119, GNorm = 0.4575, lr_0 = 2.4589e-04
Loss = 4.4603e-02, PNorm = 68.1156, GNorm = 0.5160, lr_0 = 2.4572e-04
Loss = 4.5793e-02, PNorm = 68.1219, GNorm = 0.5004, lr_0 = 2.4556e-04
Loss = 6.0482e-02, PNorm = 68.1260, GNorm = 0.6229, lr_0 = 2.4539e-04
Loss = 5.2479e-02, PNorm = 68.1311, GNorm = 0.6497, lr_0 = 2.4522e-04
Loss = 4.9439e-02, PNorm = 68.1353, GNorm = 0.5508, lr_0 = 2.4505e-04
Loss = 4.3307e-02, PNorm = 68.1394, GNorm = 0.3819, lr_0 = 2.4488e-04
Loss = 5.5482e-02, PNorm = 68.1422, GNorm = 0.5803, lr_0 = 2.4472e-04
Loss = 4.5294e-02, PNorm = 68.1467, GNorm = 0.7619, lr_0 = 2.4455e-04
Loss = 4.7202e-02, PNorm = 68.1513, GNorm = 0.5614, lr_0 = 2.4438e-04
Loss = 5.3196e-02, PNorm = 68.1551, GNorm = 0.5884, lr_0 = 2.4421e-04
Loss = 4.8760e-02, PNorm = 68.1621, GNorm = 0.5288, lr_0 = 2.4405e-04
Loss = 4.5868e-02, PNorm = 68.1656, GNorm = 0.6543, lr_0 = 2.4388e-04
Loss = 4.1812e-02, PNorm = 68.1678, GNorm = 0.5810, lr_0 = 2.4371e-04
Loss = 4.5203e-02, PNorm = 68.1716, GNorm = 0.7890, lr_0 = 2.4354e-04
Loss = 5.1352e-02, PNorm = 68.1758, GNorm = 0.6312, lr_0 = 2.4338e-04
Loss = 5.3707e-02, PNorm = 68.1782, GNorm = 0.4466, lr_0 = 2.4321e-04
Loss = 4.6207e-02, PNorm = 68.1831, GNorm = 0.4184, lr_0 = 2.4304e-04
Loss = 4.7716e-02, PNorm = 68.1881, GNorm = 0.7187, lr_0 = 2.4288e-04
Loss = 4.4175e-02, PNorm = 68.1942, GNorm = 0.6507, lr_0 = 2.4271e-04
Loss = 5.8553e-02, PNorm = 68.1975, GNorm = 0.4936, lr_0 = 2.4254e-04
Loss = 4.4696e-02, PNorm = 68.2008, GNorm = 0.3950, lr_0 = 2.4238e-04
Loss = 6.2771e-02, PNorm = 68.2075, GNorm = 0.6404, lr_0 = 2.4221e-04
Loss = 4.9513e-02, PNorm = 68.2131, GNorm = 0.6484, lr_0 = 2.4205e-04
Loss = 4.9177e-02, PNorm = 68.2163, GNorm = 0.5338, lr_0 = 2.4188e-04
Loss = 5.6176e-02, PNorm = 68.2215, GNorm = 0.4836, lr_0 = 2.4171e-04
Loss = 5.6807e-02, PNorm = 68.2275, GNorm = 0.6054, lr_0 = 2.4155e-04
Loss = 5.3682e-02, PNorm = 68.2325, GNorm = 0.8494, lr_0 = 2.4138e-04
Loss = 4.8665e-02, PNorm = 68.2393, GNorm = 0.4520, lr_0 = 2.4122e-04
Loss = 4.9247e-02, PNorm = 68.2444, GNorm = 0.4192, lr_0 = 2.4105e-04
Loss = 5.3429e-02, PNorm = 68.2473, GNorm = 0.5459, lr_0 = 2.4089e-04
Loss = 5.2243e-02, PNorm = 68.2525, GNorm = 0.4542, lr_0 = 2.4072e-04
Loss = 5.4636e-02, PNorm = 68.2565, GNorm = 0.6986, lr_0 = 2.4056e-04
Loss = 5.5072e-02, PNorm = 68.2602, GNorm = 0.5404, lr_0 = 2.4039e-04
Loss = 5.3830e-02, PNorm = 68.2627, GNorm = 0.7478, lr_0 = 2.4023e-04
Loss = 5.5884e-02, PNorm = 68.2690, GNorm = 0.4943, lr_0 = 2.4006e-04
Loss = 4.9943e-02, PNorm = 68.2752, GNorm = 0.5882, lr_0 = 2.3990e-04
Loss = 4.7903e-02, PNorm = 68.2774, GNorm = 0.5296, lr_0 = 2.3974e-04
Loss = 5.4185e-02, PNorm = 68.2797, GNorm = 0.3822, lr_0 = 2.3957e-04
Loss = 4.1652e-02, PNorm = 68.2843, GNorm = 0.4878, lr_0 = 2.3941e-04
Loss = 4.4616e-02, PNorm = 68.2890, GNorm = 0.5660, lr_0 = 2.3924e-04
Loss = 5.4944e-02, PNorm = 68.2931, GNorm = 0.6984, lr_0 = 2.3908e-04
Loss = 4.4593e-02, PNorm = 68.2969, GNorm = 0.5865, lr_0 = 2.3892e-04
Loss = 5.5936e-02, PNorm = 68.3005, GNorm = 0.5430, lr_0 = 2.3875e-04
Loss = 4.3125e-02, PNorm = 68.3047, GNorm = 0.4649, lr_0 = 2.3859e-04
Loss = 4.6158e-02, PNorm = 68.3086, GNorm = 0.4188, lr_0 = 2.3842e-04
Loss = 5.1528e-02, PNorm = 68.3109, GNorm = 0.4984, lr_0 = 2.3826e-04
Loss = 4.7435e-02, PNorm = 68.3137, GNorm = 0.5826, lr_0 = 2.3810e-04
Loss = 4.7973e-02, PNorm = 68.3170, GNorm = 0.6440, lr_0 = 2.3794e-04
Loss = 4.9817e-02, PNorm = 68.3220, GNorm = 0.4716, lr_0 = 2.3777e-04
Loss = 4.7129e-02, PNorm = 68.3266, GNorm = 0.5411, lr_0 = 2.3761e-04
Loss = 4.8071e-02, PNorm = 68.3308, GNorm = 0.6035, lr_0 = 2.3745e-04
Loss = 4.8937e-02, PNorm = 68.3347, GNorm = 0.5411, lr_0 = 2.3728e-04
Loss = 5.0573e-02, PNorm = 68.3388, GNorm = 0.5963, lr_0 = 2.3712e-04
Loss = 5.7492e-02, PNorm = 68.3421, GNorm = 0.6094, lr_0 = 2.3696e-04
Loss = 6.1217e-02, PNorm = 68.3473, GNorm = 0.5131, lr_0 = 2.3680e-04
Loss = 5.0253e-02, PNorm = 68.3515, GNorm = 0.4481, lr_0 = 2.3663e-04
Loss = 4.8737e-02, PNorm = 68.3563, GNorm = 0.5564, lr_0 = 2.3647e-04
Loss = 5.4037e-02, PNorm = 68.3607, GNorm = 0.4974, lr_0 = 2.3631e-04
Loss = 4.7083e-02, PNorm = 68.3661, GNorm = 0.4085, lr_0 = 2.3615e-04
Loss = 4.8026e-02, PNorm = 68.3715, GNorm = 0.4331, lr_0 = 2.3599e-04
Loss = 5.0624e-02, PNorm = 68.3738, GNorm = 0.5111, lr_0 = 2.3582e-04
Loss = 4.9706e-02, PNorm = 68.3767, GNorm = 0.5478, lr_0 = 2.3566e-04
Loss = 5.3897e-02, PNorm = 68.3803, GNorm = 0.4665, lr_0 = 2.3550e-04
Loss = 5.1121e-02, PNorm = 68.3868, GNorm = 0.4235, lr_0 = 2.3534e-04
Loss = 4.8038e-02, PNorm = 68.3909, GNorm = 0.5182, lr_0 = 2.3518e-04
Loss = 5.5397e-02, PNorm = 68.3937, GNorm = 0.5619, lr_0 = 2.3502e-04
Loss = 5.4050e-02, PNorm = 68.3995, GNorm = 0.4861, lr_0 = 2.3486e-04
Loss = 5.4394e-02, PNorm = 68.4021, GNorm = 0.4173, lr_0 = 2.3470e-04
Loss = 4.3631e-02, PNorm = 68.4046, GNorm = 0.4479, lr_0 = 2.3454e-04
Loss = 5.1763e-02, PNorm = 68.4095, GNorm = 0.6144, lr_0 = 2.3437e-04
Loss = 4.3173e-02, PNorm = 68.4125, GNorm = 0.6910, lr_0 = 2.3421e-04
Loss = 5.0226e-02, PNorm = 68.4158, GNorm = 0.6223, lr_0 = 2.3405e-04
Loss = 5.0545e-02, PNorm = 68.4198, GNorm = 0.6221, lr_0 = 2.3389e-04
Loss = 4.8447e-02, PNorm = 68.4240, GNorm = 0.6673, lr_0 = 2.3373e-04
Loss = 4.3589e-02, PNorm = 68.4274, GNorm = 0.4893, lr_0 = 2.3357e-04
Loss = 4.8189e-02, PNorm = 68.4308, GNorm = 1.0170, lr_0 = 2.3341e-04
Loss = 4.8090e-02, PNorm = 68.4319, GNorm = 0.5989, lr_0 = 2.3325e-04
Loss = 5.7664e-02, PNorm = 68.4340, GNorm = 0.5562, lr_0 = 2.3309e-04
Loss = 6.2808e-02, PNorm = 68.4381, GNorm = 0.7002, lr_0 = 2.3293e-04
Loss = 5.2094e-02, PNorm = 68.4433, GNorm = 0.7930, lr_0 = 2.3277e-04
Loss = 3.7551e-02, PNorm = 68.4482, GNorm = 0.7279, lr_0 = 2.3261e-04
Loss = 5.6031e-02, PNorm = 68.4507, GNorm = 0.5782, lr_0 = 2.3246e-04
Loss = 5.5729e-02, PNorm = 68.4573, GNorm = 0.7196, lr_0 = 2.3230e-04
Loss = 5.2053e-02, PNorm = 68.4610, GNorm = 0.5078, lr_0 = 2.3214e-04
Loss = 4.6558e-02, PNorm = 68.4633, GNorm = 0.3577, lr_0 = 2.3198e-04
Loss = 5.5500e-02, PNorm = 68.4656, GNorm = 0.7016, lr_0 = 2.3182e-04
Loss = 6.3688e-02, PNorm = 68.4685, GNorm = 0.5681, lr_0 = 2.3166e-04
Loss = 5.7254e-02, PNorm = 68.4719, GNorm = 0.5102, lr_0 = 2.3150e-04
Loss = 5.6129e-02, PNorm = 68.4777, GNorm = 0.6199, lr_0 = 2.3134e-04
Loss = 5.2620e-02, PNorm = 68.4830, GNorm = 0.4788, lr_0 = 2.3118e-04
Loss = 5.8580e-02, PNorm = 68.4883, GNorm = 0.7547, lr_0 = 2.3103e-04
Loss = 5.9436e-02, PNorm = 68.4948, GNorm = 0.3871, lr_0 = 2.3087e-04
Loss = 4.8913e-02, PNorm = 68.4989, GNorm = 1.0180, lr_0 = 2.3071e-04
Loss = 5.9103e-02, PNorm = 68.5024, GNorm = 0.4597, lr_0 = 2.3055e-04
Loss = 6.1332e-02, PNorm = 68.5082, GNorm = 0.6057, lr_0 = 2.3039e-04
Loss = 4.7266e-02, PNorm = 68.5149, GNorm = 0.3939, lr_0 = 2.3024e-04
Loss = 4.7909e-02, PNorm = 68.5183, GNorm = 0.9564, lr_0 = 2.3008e-04
Loss = 5.7954e-02, PNorm = 68.5221, GNorm = 0.6672, lr_0 = 2.2992e-04
Loss = 4.6089e-02, PNorm = 68.5262, GNorm = 0.3826, lr_0 = 2.2976e-04
Loss = 6.2111e-02, PNorm = 68.5298, GNorm = 0.6783, lr_0 = 2.2961e-04
Loss = 5.5389e-02, PNorm = 68.5332, GNorm = 0.5043, lr_0 = 2.2945e-04
Loss = 5.1279e-02, PNorm = 68.5357, GNorm = 0.6500, lr_0 = 2.2929e-04
Loss = 5.0791e-02, PNorm = 68.5374, GNorm = 0.5764, lr_0 = 2.2913e-04
Loss = 5.0825e-02, PNorm = 68.5408, GNorm = 0.6751, lr_0 = 2.2898e-04
Loss = 5.5995e-02, PNorm = 68.5450, GNorm = 0.7557, lr_0 = 2.2882e-04
Loss = 5.7396e-02, PNorm = 68.5473, GNorm = 0.6983, lr_0 = 2.2866e-04
Loss = 4.7618e-02, PNorm = 68.5504, GNorm = 0.5637, lr_0 = 2.2851e-04
Loss = 5.2344e-02, PNorm = 68.5532, GNorm = 0.6033, lr_0 = 2.2835e-04
Loss = 5.0971e-02, PNorm = 68.5538, GNorm = 0.5025, lr_0 = 2.2819e-04
Loss = 5.2222e-02, PNorm = 68.5555, GNorm = 0.4328, lr_0 = 2.2804e-04
Loss = 4.8698e-02, PNorm = 68.5610, GNorm = 0.3713, lr_0 = 2.2788e-04
Loss = 5.5178e-02, PNorm = 68.5667, GNorm = 0.9529, lr_0 = 2.2773e-04
Loss = 5.3883e-02, PNorm = 68.5707, GNorm = 0.6305, lr_0 = 2.2757e-04
Validation mae = 0.398066
Epoch 20
Loss = 5.3288e-02, PNorm = 68.5746, GNorm = 0.7080, lr_0 = 2.2741e-04
Loss = 4.0588e-02, PNorm = 68.5778, GNorm = 0.3542, lr_0 = 2.2726e-04
Loss = 4.6626e-02, PNorm = 68.5827, GNorm = 0.4671, lr_0 = 2.2710e-04
Loss = 4.8389e-02, PNorm = 68.5880, GNorm = 0.6394, lr_0 = 2.2695e-04
Loss = 4.7769e-02, PNorm = 68.5931, GNorm = 0.7950, lr_0 = 2.2679e-04
Loss = 5.0341e-02, PNorm = 68.5978, GNorm = 0.5169, lr_0 = 2.2664e-04
Loss = 4.7142e-02, PNorm = 68.6018, GNorm = 0.6096, lr_0 = 2.2648e-04
Loss = 4.1211e-02, PNorm = 68.6055, GNorm = 0.4240, lr_0 = 2.2632e-04
Loss = 4.5552e-02, PNorm = 68.6073, GNorm = 0.5284, lr_0 = 2.2617e-04
Loss = 5.0440e-02, PNorm = 68.6099, GNorm = 0.6293, lr_0 = 2.2601e-04
Loss = 4.7382e-02, PNorm = 68.6142, GNorm = 0.4322, lr_0 = 2.2586e-04
Loss = 4.6990e-02, PNorm = 68.6203, GNorm = 0.4952, lr_0 = 2.2571e-04
Loss = 4.1581e-02, PNorm = 68.6251, GNorm = 0.4901, lr_0 = 2.2555e-04
Loss = 4.0814e-02, PNorm = 68.6280, GNorm = 0.4080, lr_0 = 2.2540e-04
Loss = 4.2521e-02, PNorm = 68.6308, GNorm = 0.4031, lr_0 = 2.2524e-04
Loss = 4.3872e-02, PNorm = 68.6363, GNorm = 0.5952, lr_0 = 2.2509e-04
Loss = 4.1491e-02, PNorm = 68.6398, GNorm = 0.4461, lr_0 = 2.2493e-04
Loss = 4.8292e-02, PNorm = 68.6413, GNorm = 0.5203, lr_0 = 2.2478e-04
Loss = 4.5226e-02, PNorm = 68.6465, GNorm = 0.4949, lr_0 = 2.2463e-04
Loss = 4.8371e-02, PNorm = 68.6509, GNorm = 0.4214, lr_0 = 2.2447e-04
Loss = 5.6685e-02, PNorm = 68.6566, GNorm = 0.4585, lr_0 = 2.2432e-04
Loss = 5.2573e-02, PNorm = 68.6604, GNorm = 0.4756, lr_0 = 2.2416e-04
Loss = 3.8247e-02, PNorm = 68.6642, GNorm = 0.3459, lr_0 = 2.2401e-04
Loss = 3.7173e-02, PNorm = 68.6663, GNorm = 0.4318, lr_0 = 2.2386e-04
Loss = 5.0064e-02, PNorm = 68.6692, GNorm = 0.6055, lr_0 = 2.2370e-04
Loss = 3.6303e-02, PNorm = 68.6720, GNorm = 0.6121, lr_0 = 2.2355e-04
Loss = 5.0224e-02, PNorm = 68.6758, GNorm = 0.6267, lr_0 = 2.2340e-04
Loss = 4.8963e-02, PNorm = 68.6793, GNorm = 0.4896, lr_0 = 2.2324e-04
Loss = 4.3676e-02, PNorm = 68.6827, GNorm = 0.4691, lr_0 = 2.2309e-04
Loss = 4.9316e-02, PNorm = 68.6869, GNorm = 0.7957, lr_0 = 2.2294e-04
Loss = 4.6838e-02, PNorm = 68.6913, GNorm = 0.5099, lr_0 = 2.2279e-04
Loss = 4.6408e-02, PNorm = 68.6967, GNorm = 0.3903, lr_0 = 2.2263e-04
Loss = 4.7540e-02, PNorm = 68.7020, GNorm = 0.6585, lr_0 = 2.2248e-04
Loss = 4.5672e-02, PNorm = 68.7065, GNorm = 0.6467, lr_0 = 2.2233e-04
Loss = 4.9723e-02, PNorm = 68.7117, GNorm = 0.4063, lr_0 = 2.2218e-04
Loss = 4.1413e-02, PNorm = 68.7150, GNorm = 0.5476, lr_0 = 2.2202e-04
Loss = 3.9376e-02, PNorm = 68.7178, GNorm = 0.3894, lr_0 = 2.2187e-04
Loss = 4.7348e-02, PNorm = 68.7199, GNorm = 0.4046, lr_0 = 2.2172e-04
Loss = 4.6199e-02, PNorm = 68.7233, GNorm = 0.5142, lr_0 = 2.2157e-04
Loss = 5.4758e-02, PNorm = 68.7261, GNorm = 0.8228, lr_0 = 2.2142e-04
Loss = 4.0865e-02, PNorm = 68.7297, GNorm = 0.4747, lr_0 = 2.2126e-04
Loss = 4.1779e-02, PNorm = 68.7331, GNorm = 0.4679, lr_0 = 2.2111e-04
Loss = 4.4086e-02, PNorm = 68.7370, GNorm = 0.4597, lr_0 = 2.2096e-04
Loss = 5.4373e-02, PNorm = 68.7411, GNorm = 0.8582, lr_0 = 2.2081e-04
Loss = 3.7452e-02, PNorm = 68.7463, GNorm = 0.3414, lr_0 = 2.2066e-04
Loss = 5.0730e-02, PNorm = 68.7518, GNorm = 0.4856, lr_0 = 2.2051e-04
Loss = 5.3115e-02, PNorm = 68.7545, GNorm = 0.5372, lr_0 = 2.2036e-04
Loss = 4.2733e-02, PNorm = 68.7563, GNorm = 0.5052, lr_0 = 2.2021e-04
Loss = 4.9957e-02, PNorm = 68.7610, GNorm = 0.5183, lr_0 = 2.2005e-04
Loss = 4.9841e-02, PNorm = 68.7657, GNorm = 0.6348, lr_0 = 2.1990e-04
Loss = 4.6597e-02, PNorm = 68.7693, GNorm = 0.5010, lr_0 = 2.1975e-04
Loss = 5.4397e-02, PNorm = 68.7722, GNorm = 0.6318, lr_0 = 2.1960e-04
Loss = 4.6664e-02, PNorm = 68.7766, GNorm = 0.4082, lr_0 = 2.1945e-04
Loss = 4.9305e-02, PNorm = 68.7822, GNorm = 0.4877, lr_0 = 2.1930e-04
Loss = 5.1909e-02, PNorm = 68.7853, GNorm = 0.6218, lr_0 = 2.1915e-04
Loss = 5.1885e-02, PNorm = 68.7885, GNorm = 0.4806, lr_0 = 2.1900e-04
Loss = 4.6021e-02, PNorm = 68.7908, GNorm = 0.5873, lr_0 = 2.1885e-04
Loss = 4.7108e-02, PNorm = 68.7929, GNorm = 0.5905, lr_0 = 2.1870e-04
Loss = 4.4909e-02, PNorm = 68.7988, GNorm = 0.6336, lr_0 = 2.1855e-04
Loss = 5.1456e-02, PNorm = 68.8041, GNorm = 0.4343, lr_0 = 2.1840e-04
Loss = 4.8392e-02, PNorm = 68.8083, GNorm = 0.7038, lr_0 = 2.1825e-04
Loss = 5.0311e-02, PNorm = 68.8100, GNorm = 0.4711, lr_0 = 2.1810e-04
Loss = 5.3118e-02, PNorm = 68.8129, GNorm = 0.5854, lr_0 = 2.1795e-04
Loss = 4.8268e-02, PNorm = 68.8170, GNorm = 0.4672, lr_0 = 2.1780e-04
Loss = 4.7075e-02, PNorm = 68.8203, GNorm = 0.6399, lr_0 = 2.1765e-04
Loss = 5.7431e-02, PNorm = 68.8242, GNorm = 0.6420, lr_0 = 2.1751e-04
Loss = 4.4418e-02, PNorm = 68.8265, GNorm = 0.5522, lr_0 = 2.1736e-04
Loss = 4.2256e-02, PNorm = 68.8293, GNorm = 0.4315, lr_0 = 2.1721e-04
Loss = 4.4168e-02, PNorm = 68.8331, GNorm = 0.5475, lr_0 = 2.1706e-04
Loss = 4.6902e-02, PNorm = 68.8347, GNorm = 0.6232, lr_0 = 2.1691e-04
Loss = 5.7004e-02, PNorm = 68.8371, GNorm = 0.4743, lr_0 = 2.1676e-04
Loss = 4.5644e-02, PNorm = 68.8394, GNorm = 0.6077, lr_0 = 2.1661e-04
Loss = 5.9402e-02, PNorm = 68.8417, GNorm = 0.6016, lr_0 = 2.1646e-04
Loss = 4.7113e-02, PNorm = 68.8460, GNorm = 0.4690, lr_0 = 2.1632e-04
Loss = 4.6080e-02, PNorm = 68.8493, GNorm = 0.5761, lr_0 = 2.1617e-04
Loss = 5.4255e-02, PNorm = 68.8511, GNorm = 0.6034, lr_0 = 2.1602e-04
Loss = 4.7540e-02, PNorm = 68.8559, GNorm = 0.4181, lr_0 = 2.1587e-04
Loss = 5.1835e-02, PNorm = 68.8614, GNorm = 0.8592, lr_0 = 2.1572e-04
Loss = 4.7292e-02, PNorm = 68.8637, GNorm = 0.4414, lr_0 = 2.1558e-04
Loss = 5.0309e-02, PNorm = 68.8663, GNorm = 0.5507, lr_0 = 2.1543e-04
Loss = 4.4584e-02, PNorm = 68.8688, GNorm = 0.6867, lr_0 = 2.1528e-04
Loss = 5.1641e-02, PNorm = 68.8740, GNorm = 0.4854, lr_0 = 2.1513e-04
Loss = 4.4342e-02, PNorm = 68.8791, GNorm = 0.5961, lr_0 = 2.1499e-04
Loss = 4.5212e-02, PNorm = 68.8827, GNorm = 0.6576, lr_0 = 2.1484e-04
Loss = 4.0981e-02, PNorm = 68.8863, GNorm = 0.5950, lr_0 = 2.1469e-04
Loss = 4.1839e-02, PNorm = 68.8905, GNorm = 0.4578, lr_0 = 2.1454e-04
Loss = 5.5252e-02, PNorm = 68.8950, GNorm = 0.5287, lr_0 = 2.1440e-04
Loss = 4.6844e-02, PNorm = 68.8986, GNorm = 0.5345, lr_0 = 2.1425e-04
Loss = 5.4808e-02, PNorm = 68.9022, GNorm = 0.5938, lr_0 = 2.1410e-04
Loss = 4.2591e-02, PNorm = 68.9064, GNorm = 0.7057, lr_0 = 2.1396e-04
Loss = 4.8733e-02, PNorm = 68.9103, GNorm = 0.5003, lr_0 = 2.1381e-04
Loss = 4.9748e-02, PNorm = 68.9148, GNorm = 0.5148, lr_0 = 2.1366e-04
Loss = 5.0355e-02, PNorm = 68.9196, GNorm = 0.5187, lr_0 = 2.1352e-04
Loss = 4.5061e-02, PNorm = 68.9238, GNorm = 0.4829, lr_0 = 2.1337e-04
Loss = 5.5997e-02, PNorm = 68.9273, GNorm = 0.7147, lr_0 = 2.1323e-04
Loss = 5.4592e-02, PNorm = 68.9302, GNorm = 0.5910, lr_0 = 2.1308e-04
Loss = 5.5829e-02, PNorm = 68.9345, GNorm = 0.6998, lr_0 = 2.1293e-04
Loss = 4.3513e-02, PNorm = 68.9389, GNorm = 1.0226, lr_0 = 2.1279e-04
Loss = 4.3393e-02, PNorm = 68.9418, GNorm = 0.4288, lr_0 = 2.1264e-04
Loss = 5.0809e-02, PNorm = 68.9441, GNorm = 0.9573, lr_0 = 2.1250e-04
Loss = 6.4129e-02, PNorm = 68.9468, GNorm = 0.5695, lr_0 = 2.1235e-04
Loss = 4.7576e-02, PNorm = 68.9516, GNorm = 0.5555, lr_0 = 2.1221e-04
Loss = 4.6560e-02, PNorm = 68.9557, GNorm = 0.6526, lr_0 = 2.1206e-04
Loss = 4.8235e-02, PNorm = 68.9578, GNorm = 0.5330, lr_0 = 2.1191e-04
Loss = 5.1164e-02, PNorm = 68.9589, GNorm = 0.5042, lr_0 = 2.1177e-04
Loss = 5.2545e-02, PNorm = 68.9613, GNorm = 0.5934, lr_0 = 2.1162e-04
Loss = 4.7203e-02, PNorm = 68.9637, GNorm = 0.5611, lr_0 = 2.1148e-04
Loss = 4.4838e-02, PNorm = 68.9669, GNorm = 0.3998, lr_0 = 2.1133e-04
Loss = 4.9242e-02, PNorm = 68.9696, GNorm = 0.6467, lr_0 = 2.1119e-04
Loss = 5.0445e-02, PNorm = 68.9705, GNorm = 0.7044, lr_0 = 2.1104e-04
Loss = 5.2241e-02, PNorm = 68.9731, GNorm = 0.4413, lr_0 = 2.1090e-04
Loss = 4.6870e-02, PNorm = 68.9746, GNorm = 0.5266, lr_0 = 2.1076e-04
Loss = 4.4477e-02, PNorm = 68.9766, GNorm = 0.8842, lr_0 = 2.1061e-04
Loss = 4.7508e-02, PNorm = 68.9802, GNorm = 0.7521, lr_0 = 2.1047e-04
Loss = 5.2285e-02, PNorm = 68.9830, GNorm = 0.4632, lr_0 = 2.1032e-04
Loss = 5.0844e-02, PNorm = 68.9860, GNorm = 0.4039, lr_0 = 2.1018e-04
Loss = 5.1680e-02, PNorm = 68.9884, GNorm = 0.6866, lr_0 = 2.1003e-04
Loss = 4.7990e-02, PNorm = 68.9918, GNorm = 0.4573, lr_0 = 2.0989e-04
Loss = 4.6806e-02, PNorm = 68.9958, GNorm = 1.0418, lr_0 = 2.0975e-04
Loss = 4.6981e-02, PNorm = 68.9977, GNorm = 0.5880, lr_0 = 2.0960e-04
Validation mae = 0.384121
Epoch 21
Loss = 3.9473e-02, PNorm = 69.0017, GNorm = 0.3131, lr_0 = 2.0946e-04
Loss = 4.0392e-02, PNorm = 69.0053, GNorm = 0.3626, lr_0 = 2.0932e-04
Loss = 4.2930e-02, PNorm = 69.0083, GNorm = 0.5039, lr_0 = 2.0917e-04
Loss = 4.2977e-02, PNorm = 69.0113, GNorm = 0.4481, lr_0 = 2.0903e-04
Loss = 3.6059e-02, PNorm = 69.0120, GNorm = 0.2958, lr_0 = 2.0889e-04
Loss = 3.8145e-02, PNorm = 69.0149, GNorm = 0.5188, lr_0 = 2.0874e-04
Loss = 4.5990e-02, PNorm = 69.0184, GNorm = 0.4110, lr_0 = 2.0860e-04
Loss = 3.9357e-02, PNorm = 69.0233, GNorm = 0.5954, lr_0 = 2.0846e-04
Loss = 4.2546e-02, PNorm = 69.0274, GNorm = 0.4552, lr_0 = 2.0831e-04
Loss = 4.2793e-02, PNorm = 69.0303, GNorm = 0.4406, lr_0 = 2.0817e-04
Loss = 4.3102e-02, PNorm = 69.0337, GNorm = 0.6125, lr_0 = 2.0803e-04
Loss = 3.8448e-02, PNorm = 69.0380, GNorm = 0.5581, lr_0 = 2.0789e-04
Loss = 4.3329e-02, PNorm = 69.0425, GNorm = 0.5757, lr_0 = 2.0774e-04
Loss = 4.0810e-02, PNorm = 69.0475, GNorm = 0.3863, lr_0 = 2.0760e-04
Loss = 4.3032e-02, PNorm = 69.0520, GNorm = 0.5603, lr_0 = 2.0746e-04
Loss = 4.0830e-02, PNorm = 69.0565, GNorm = 0.5518, lr_0 = 2.0732e-04
Loss = 4.4933e-02, PNorm = 69.0598, GNorm = 0.6130, lr_0 = 2.0718e-04
Loss = 4.3476e-02, PNorm = 69.0631, GNorm = 0.5175, lr_0 = 2.0703e-04
Loss = 4.5101e-02, PNorm = 69.0674, GNorm = 0.6986, lr_0 = 2.0689e-04
Loss = 4.5522e-02, PNorm = 69.0699, GNorm = 0.5980, lr_0 = 2.0675e-04
Loss = 4.5766e-02, PNorm = 69.0748, GNorm = 0.4971, lr_0 = 2.0661e-04
Loss = 4.0207e-02, PNorm = 69.0796, GNorm = 0.5638, lr_0 = 2.0647e-04
Loss = 4.4659e-02, PNorm = 69.0816, GNorm = 0.3862, lr_0 = 2.0633e-04
Loss = 4.4522e-02, PNorm = 69.0847, GNorm = 0.4896, lr_0 = 2.0618e-04
Loss = 5.0298e-02, PNorm = 69.0884, GNorm = 0.4807, lr_0 = 2.0604e-04
Loss = 4.1702e-02, PNorm = 69.0912, GNorm = 0.3911, lr_0 = 2.0590e-04
Loss = 4.2528e-02, PNorm = 69.0942, GNorm = 0.4111, lr_0 = 2.0576e-04
Loss = 4.3731e-02, PNorm = 69.0970, GNorm = 0.3554, lr_0 = 2.0562e-04
Loss = 4.0616e-02, PNorm = 69.1006, GNorm = 0.6018, lr_0 = 2.0548e-04
Loss = 5.1867e-02, PNorm = 69.1055, GNorm = 0.5177, lr_0 = 2.0534e-04
Loss = 4.4978e-02, PNorm = 69.1121, GNorm = 0.3714, lr_0 = 2.0520e-04
Loss = 4.6465e-02, PNorm = 69.1166, GNorm = 0.6218, lr_0 = 2.0506e-04
Loss = 4.5026e-02, PNorm = 69.1188, GNorm = 0.6133, lr_0 = 2.0492e-04
Loss = 4.3173e-02, PNorm = 69.1206, GNorm = 0.5416, lr_0 = 2.0478e-04
Loss = 4.5126e-02, PNorm = 69.1237, GNorm = 0.7170, lr_0 = 2.0464e-04
Loss = 3.8532e-02, PNorm = 69.1257, GNorm = 0.4920, lr_0 = 2.0450e-04
Loss = 4.5863e-02, PNorm = 69.1289, GNorm = 0.6992, lr_0 = 2.0436e-04
Loss = 4.9514e-02, PNorm = 69.1343, GNorm = 0.8384, lr_0 = 2.0422e-04
Loss = 4.6376e-02, PNorm = 69.1388, GNorm = 0.4959, lr_0 = 2.0408e-04
Loss = 4.4396e-02, PNorm = 69.1427, GNorm = 0.3190, lr_0 = 2.0394e-04
Loss = 3.9197e-02, PNorm = 69.1455, GNorm = 0.5540, lr_0 = 2.0380e-04
Loss = 4.1124e-02, PNorm = 69.1498, GNorm = 0.4642, lr_0 = 2.0366e-04
Loss = 4.9143e-02, PNorm = 69.1537, GNorm = 0.5696, lr_0 = 2.0352e-04
Loss = 5.2386e-02, PNorm = 69.1579, GNorm = 0.6797, lr_0 = 2.0338e-04
Loss = 3.8306e-02, PNorm = 69.1641, GNorm = 0.4495, lr_0 = 2.0324e-04
Loss = 4.0473e-02, PNorm = 69.1688, GNorm = 0.4408, lr_0 = 2.0310e-04
Loss = 4.8624e-02, PNorm = 69.1721, GNorm = 0.6458, lr_0 = 2.0296e-04
Loss = 4.2500e-02, PNorm = 69.1748, GNorm = 0.3644, lr_0 = 2.0282e-04
Loss = 4.6386e-02, PNorm = 69.1787, GNorm = 1.1055, lr_0 = 2.0268e-04
Loss = 4.2164e-02, PNorm = 69.1824, GNorm = 0.4527, lr_0 = 2.0254e-04
Loss = 4.3587e-02, PNorm = 69.1857, GNorm = 0.4835, lr_0 = 2.0240e-04
Loss = 4.7553e-02, PNorm = 69.1877, GNorm = 0.6530, lr_0 = 2.0227e-04
Loss = 4.6901e-02, PNorm = 69.1903, GNorm = 0.5862, lr_0 = 2.0213e-04
Loss = 4.1187e-02, PNorm = 69.1941, GNorm = 0.4875, lr_0 = 2.0199e-04
Loss = 3.8371e-02, PNorm = 69.1967, GNorm = 0.4041, lr_0 = 2.0185e-04
Loss = 4.2158e-02, PNorm = 69.1997, GNorm = 0.7829, lr_0 = 2.0171e-04
Loss = 4.4654e-02, PNorm = 69.2031, GNorm = 0.6905, lr_0 = 2.0157e-04
Loss = 4.3517e-02, PNorm = 69.2065, GNorm = 0.6117, lr_0 = 2.0144e-04
Loss = 4.5204e-02, PNorm = 69.2100, GNorm = 0.4717, lr_0 = 2.0130e-04
Loss = 3.7127e-02, PNorm = 69.2119, GNorm = 0.6842, lr_0 = 2.0116e-04
Loss = 5.3762e-02, PNorm = 69.2163, GNorm = 0.8238, lr_0 = 2.0102e-04
Loss = 4.9169e-02, PNorm = 69.2205, GNorm = 0.7535, lr_0 = 2.0088e-04
Loss = 4.2591e-02, PNorm = 69.2240, GNorm = 0.5144, lr_0 = 2.0075e-04
Loss = 4.3220e-02, PNorm = 69.2266, GNorm = 0.6192, lr_0 = 2.0061e-04
Loss = 5.5683e-02, PNorm = 69.2281, GNorm = 0.8599, lr_0 = 2.0047e-04
Loss = 4.2823e-02, PNorm = 69.2316, GNorm = 0.6058, lr_0 = 2.0033e-04
Loss = 4.1693e-02, PNorm = 69.2347, GNorm = 0.3828, lr_0 = 2.0020e-04
Loss = 5.0178e-02, PNorm = 69.2386, GNorm = 0.8274, lr_0 = 2.0006e-04
Loss = 4.4027e-02, PNorm = 69.2422, GNorm = 0.5174, lr_0 = 1.9992e-04
Loss = 4.9606e-02, PNorm = 69.2455, GNorm = 0.5660, lr_0 = 1.9979e-04
Loss = 4.6235e-02, PNorm = 69.2477, GNorm = 0.4694, lr_0 = 1.9965e-04
Loss = 4.2539e-02, PNorm = 69.2505, GNorm = 0.4550, lr_0 = 1.9951e-04
Loss = 5.1677e-02, PNorm = 69.2553, GNorm = 0.5907, lr_0 = 1.9938e-04
Loss = 4.7700e-02, PNorm = 69.2584, GNorm = 0.4899, lr_0 = 1.9924e-04
Loss = 4.5411e-02, PNorm = 69.2609, GNorm = 0.4358, lr_0 = 1.9910e-04
Loss = 4.8181e-02, PNorm = 69.2639, GNorm = 0.5198, lr_0 = 1.9897e-04
Loss = 4.3891e-02, PNorm = 69.2672, GNorm = 0.5394, lr_0 = 1.9883e-04
Loss = 5.3566e-02, PNorm = 69.2701, GNorm = 0.4012, lr_0 = 1.9869e-04
Loss = 5.0919e-02, PNorm = 69.2727, GNorm = 0.4897, lr_0 = 1.9856e-04
Loss = 4.7541e-02, PNorm = 69.2755, GNorm = 0.7244, lr_0 = 1.9842e-04
Loss = 5.3345e-02, PNorm = 69.2784, GNorm = 0.3606, lr_0 = 1.9829e-04
Loss = 4.5009e-02, PNorm = 69.2815, GNorm = 0.5342, lr_0 = 1.9815e-04
Loss = 4.3650e-02, PNorm = 69.2835, GNorm = 0.6293, lr_0 = 1.9801e-04
Loss = 4.4202e-02, PNorm = 69.2852, GNorm = 0.4611, lr_0 = 1.9788e-04
Loss = 4.9346e-02, PNorm = 69.2884, GNorm = 0.7128, lr_0 = 1.9774e-04
Loss = 5.4103e-02, PNorm = 69.2921, GNorm = 0.6275, lr_0 = 1.9761e-04
Loss = 4.8725e-02, PNorm = 69.2942, GNorm = 0.4007, lr_0 = 1.9747e-04
Loss = 5.2519e-02, PNorm = 69.2972, GNorm = 0.6334, lr_0 = 1.9734e-04
Loss = 4.2129e-02, PNorm = 69.3012, GNorm = 0.6442, lr_0 = 1.9720e-04
Loss = 4.3093e-02, PNorm = 69.3036, GNorm = 0.3361, lr_0 = 1.9707e-04
Loss = 5.3742e-02, PNorm = 69.3075, GNorm = 0.6187, lr_0 = 1.9693e-04
Loss = 3.9546e-02, PNorm = 69.3123, GNorm = 0.6312, lr_0 = 1.9680e-04
Loss = 3.9368e-02, PNorm = 69.3145, GNorm = 0.5280, lr_0 = 1.9666e-04
Loss = 4.7489e-02, PNorm = 69.3180, GNorm = 0.6410, lr_0 = 1.9653e-04
Loss = 4.5708e-02, PNorm = 69.3219, GNorm = 0.4780, lr_0 = 1.9639e-04
Loss = 3.6379e-02, PNorm = 69.3248, GNorm = 0.8000, lr_0 = 1.9626e-04
Loss = 4.9286e-02, PNorm = 69.3278, GNorm = 0.5088, lr_0 = 1.9612e-04
Loss = 4.6988e-02, PNorm = 69.3312, GNorm = 0.3654, lr_0 = 1.9599e-04
Loss = 4.2261e-02, PNorm = 69.3337, GNorm = 0.5900, lr_0 = 1.9585e-04
Loss = 4.7916e-02, PNorm = 69.3354, GNorm = 0.5609, lr_0 = 1.9572e-04
Loss = 4.5906e-02, PNorm = 69.3367, GNorm = 0.4423, lr_0 = 1.9559e-04
Loss = 5.4812e-02, PNorm = 69.3415, GNorm = 0.5087, lr_0 = 1.9545e-04
Loss = 4.8724e-02, PNorm = 69.3472, GNorm = 0.7433, lr_0 = 1.9532e-04
Loss = 4.4433e-02, PNorm = 69.3527, GNorm = 0.4612, lr_0 = 1.9518e-04
Loss = 4.4213e-02, PNorm = 69.3572, GNorm = 0.5186, lr_0 = 1.9505e-04
Loss = 5.4939e-02, PNorm = 69.3596, GNorm = 0.3778, lr_0 = 1.9492e-04
Loss = 4.9199e-02, PNorm = 69.3635, GNorm = 0.4442, lr_0 = 1.9478e-04
Loss = 3.6776e-02, PNorm = 69.3657, GNorm = 0.4572, lr_0 = 1.9465e-04
Loss = 4.8870e-02, PNorm = 69.3684, GNorm = 0.4376, lr_0 = 1.9452e-04
Loss = 5.0593e-02, PNorm = 69.3709, GNorm = 0.4898, lr_0 = 1.9438e-04
Loss = 4.6830e-02, PNorm = 69.3725, GNorm = 0.5447, lr_0 = 1.9425e-04
Loss = 5.1506e-02, PNorm = 69.3729, GNorm = 0.5768, lr_0 = 1.9412e-04
Loss = 4.8864e-02, PNorm = 69.3750, GNorm = 0.5796, lr_0 = 1.9398e-04
Loss = 5.0319e-02, PNorm = 69.3793, GNorm = 0.8160, lr_0 = 1.9385e-04
Loss = 5.0549e-02, PNorm = 69.3816, GNorm = 0.6888, lr_0 = 1.9372e-04
Loss = 4.6146e-02, PNorm = 69.3822, GNorm = 0.6452, lr_0 = 1.9359e-04
Loss = 4.5047e-02, PNorm = 69.3827, GNorm = 0.4514, lr_0 = 1.9345e-04
Loss = 4.5078e-02, PNorm = 69.3855, GNorm = 0.4954, lr_0 = 1.9332e-04
Loss = 4.8118e-02, PNorm = 69.3893, GNorm = 0.5709, lr_0 = 1.9319e-04
Loss = 4.2255e-02, PNorm = 69.3927, GNorm = 0.5957, lr_0 = 1.9306e-04
Validation mae = 0.386638
Epoch 22
Loss = 4.6804e-02, PNorm = 69.3965, GNorm = 0.2861, lr_0 = 1.9292e-04
Loss = 4.0831e-02, PNorm = 69.3992, GNorm = 0.4273, lr_0 = 1.9279e-04
Loss = 3.9341e-02, PNorm = 69.4021, GNorm = 0.3966, lr_0 = 1.9266e-04
Loss = 3.9095e-02, PNorm = 69.4058, GNorm = 0.6165, lr_0 = 1.9253e-04
Loss = 3.4656e-02, PNorm = 69.4108, GNorm = 0.5465, lr_0 = 1.9240e-04
Loss = 3.6903e-02, PNorm = 69.4133, GNorm = 0.3874, lr_0 = 1.9226e-04
Loss = 4.2083e-02, PNorm = 69.4126, GNorm = 0.4544, lr_0 = 1.9213e-04
Loss = 4.0456e-02, PNorm = 69.4137, GNorm = 0.5559, lr_0 = 1.9200e-04
Loss = 5.0384e-02, PNorm = 69.4160, GNorm = 1.2818, lr_0 = 1.9187e-04
Loss = 4.3215e-02, PNorm = 69.4196, GNorm = 0.5355, lr_0 = 1.9174e-04
Loss = 4.1490e-02, PNorm = 69.4235, GNorm = 0.4371, lr_0 = 1.9161e-04
Loss = 3.6703e-02, PNorm = 69.4282, GNorm = 0.4053, lr_0 = 1.9148e-04
Loss = 4.4846e-02, PNorm = 69.4341, GNorm = 0.3846, lr_0 = 1.9134e-04
Loss = 4.2168e-02, PNorm = 69.4400, GNorm = 0.7465, lr_0 = 1.9121e-04
Loss = 4.1533e-02, PNorm = 69.4425, GNorm = 0.4233, lr_0 = 1.9108e-04
Loss = 4.4429e-02, PNorm = 69.4464, GNorm = 0.4578, lr_0 = 1.9095e-04
Loss = 4.6453e-02, PNorm = 69.4488, GNorm = 0.5627, lr_0 = 1.9082e-04
Loss = 5.3060e-02, PNorm = 69.4524, GNorm = 0.4559, lr_0 = 1.9069e-04
Loss = 4.3472e-02, PNorm = 69.4563, GNorm = 0.6126, lr_0 = 1.9056e-04
Loss = 3.8503e-02, PNorm = 69.4596, GNorm = 0.6084, lr_0 = 1.9043e-04
Loss = 3.8404e-02, PNorm = 69.4619, GNorm = 0.6483, lr_0 = 1.9030e-04
Loss = 4.2634e-02, PNorm = 69.4640, GNorm = 0.3925, lr_0 = 1.9017e-04
Loss = 3.7375e-02, PNorm = 69.4653, GNorm = 0.6493, lr_0 = 1.9004e-04
Loss = 4.2618e-02, PNorm = 69.4679, GNorm = 0.8829, lr_0 = 1.8991e-04
Loss = 4.6497e-02, PNorm = 69.4710, GNorm = 0.3920, lr_0 = 1.8978e-04
Loss = 4.3016e-02, PNorm = 69.4751, GNorm = 0.5444, lr_0 = 1.8965e-04
Loss = 4.4240e-02, PNorm = 69.4775, GNorm = 0.6234, lr_0 = 1.8952e-04
Loss = 3.2318e-02, PNorm = 69.4777, GNorm = 0.5864, lr_0 = 1.8939e-04
Loss = 5.3462e-02, PNorm = 69.4804, GNorm = 0.5140, lr_0 = 1.8926e-04
Loss = 4.5135e-02, PNorm = 69.4844, GNorm = 0.5266, lr_0 = 1.8913e-04
Loss = 4.0881e-02, PNorm = 69.4875, GNorm = 0.8699, lr_0 = 1.8900e-04
Loss = 5.0521e-02, PNorm = 69.4899, GNorm = 0.4954, lr_0 = 1.8887e-04
Loss = 3.9080e-02, PNorm = 69.4921, GNorm = 0.5976, lr_0 = 1.8874e-04
Loss = 3.8524e-02, PNorm = 69.4956, GNorm = 0.3203, lr_0 = 1.8861e-04
Loss = 3.4356e-02, PNorm = 69.4975, GNorm = 0.4189, lr_0 = 1.8848e-04
Loss = 3.9007e-02, PNorm = 69.5007, GNorm = 0.7120, lr_0 = 1.8835e-04
Loss = 3.8814e-02, PNorm = 69.5039, GNorm = 0.4303, lr_0 = 1.8822e-04
Loss = 3.7859e-02, PNorm = 69.5074, GNorm = 0.4346, lr_0 = 1.8809e-04
Loss = 4.6121e-02, PNorm = 69.5112, GNorm = 0.4590, lr_0 = 1.8797e-04
Loss = 3.9684e-02, PNorm = 69.5141, GNorm = 0.5627, lr_0 = 1.8784e-04
Loss = 3.9615e-02, PNorm = 69.5176, GNorm = 0.3270, lr_0 = 1.8771e-04
Loss = 4.0814e-02, PNorm = 69.5199, GNorm = 0.4913, lr_0 = 1.8758e-04
Loss = 3.5410e-02, PNorm = 69.5234, GNorm = 0.6131, lr_0 = 1.8745e-04
Loss = 4.3543e-02, PNorm = 69.5257, GNorm = 0.6750, lr_0 = 1.8732e-04
Loss = 3.9179e-02, PNorm = 69.5268, GNorm = 0.5522, lr_0 = 1.8719e-04
Loss = 3.7696e-02, PNorm = 69.5292, GNorm = 0.6383, lr_0 = 1.8707e-04
Loss = 3.8805e-02, PNorm = 69.5333, GNorm = 0.5407, lr_0 = 1.8694e-04
Loss = 4.3242e-02, PNorm = 69.5362, GNorm = 0.4053, lr_0 = 1.8681e-04
Loss = 4.5007e-02, PNorm = 69.5388, GNorm = 0.6839, lr_0 = 1.8668e-04
Loss = 4.2608e-02, PNorm = 69.5419, GNorm = 0.4662, lr_0 = 1.8655e-04
Loss = 3.9245e-02, PNorm = 69.5457, GNorm = 0.4211, lr_0 = 1.8643e-04
Loss = 4.9708e-02, PNorm = 69.5481, GNorm = 0.4506, lr_0 = 1.8630e-04
Loss = 4.3345e-02, PNorm = 69.5511, GNorm = 0.4021, lr_0 = 1.8617e-04
Loss = 3.6041e-02, PNorm = 69.5542, GNorm = 0.6651, lr_0 = 1.8604e-04
Loss = 3.9490e-02, PNorm = 69.5571, GNorm = 0.5113, lr_0 = 1.8592e-04
Loss = 4.2897e-02, PNorm = 69.5602, GNorm = 0.4934, lr_0 = 1.8579e-04
Loss = 3.9564e-02, PNorm = 69.5633, GNorm = 0.4488, lr_0 = 1.8566e-04
Loss = 5.0488e-02, PNorm = 69.5669, GNorm = 0.5306, lr_0 = 1.8553e-04
Loss = 3.7500e-02, PNorm = 69.5702, GNorm = 0.5088, lr_0 = 1.8541e-04
Loss = 4.7558e-02, PNorm = 69.5732, GNorm = 0.4414, lr_0 = 1.8528e-04
Loss = 4.6481e-02, PNorm = 69.5761, GNorm = 0.4948, lr_0 = 1.8515e-04
Loss = 4.2374e-02, PNorm = 69.5800, GNorm = 0.6778, lr_0 = 1.8503e-04
Loss = 3.8447e-02, PNorm = 69.5824, GNorm = 0.4004, lr_0 = 1.8490e-04
Loss = 4.0693e-02, PNorm = 69.5850, GNorm = 0.5182, lr_0 = 1.8477e-04
Loss = 4.0803e-02, PNorm = 69.5874, GNorm = 0.5348, lr_0 = 1.8465e-04
Loss = 4.3229e-02, PNorm = 69.5906, GNorm = 0.3656, lr_0 = 1.8452e-04
Loss = 4.8683e-02, PNorm = 69.5929, GNorm = 0.3524, lr_0 = 1.8439e-04
Loss = 4.3628e-02, PNorm = 69.5948, GNorm = 0.5780, lr_0 = 1.8427e-04
Loss = 4.7912e-02, PNorm = 69.5971, GNorm = 0.4130, lr_0 = 1.8414e-04
Loss = 4.2732e-02, PNorm = 69.6013, GNorm = 0.3518, lr_0 = 1.8401e-04
Loss = 4.4595e-02, PNorm = 69.6041, GNorm = 0.5973, lr_0 = 1.8389e-04
Loss = 4.6503e-02, PNorm = 69.6078, GNorm = 0.5870, lr_0 = 1.8376e-04
Loss = 4.4784e-02, PNorm = 69.6115, GNorm = 0.5420, lr_0 = 1.8364e-04
Loss = 4.7227e-02, PNorm = 69.6142, GNorm = 0.6937, lr_0 = 1.8351e-04
Loss = 4.6562e-02, PNorm = 69.6180, GNorm = 0.5047, lr_0 = 1.8338e-04
Loss = 4.3225e-02, PNorm = 69.6217, GNorm = 0.4926, lr_0 = 1.8326e-04
Loss = 4.8053e-02, PNorm = 69.6231, GNorm = 0.5909, lr_0 = 1.8313e-04
Loss = 4.5327e-02, PNorm = 69.6245, GNorm = 0.4727, lr_0 = 1.8301e-04
Loss = 3.7163e-02, PNorm = 69.6277, GNorm = 0.4730, lr_0 = 1.8288e-04
Loss = 4.2379e-02, PNorm = 69.6302, GNorm = 0.3798, lr_0 = 1.8276e-04
Loss = 4.1322e-02, PNorm = 69.6323, GNorm = 0.5770, lr_0 = 1.8263e-04
Loss = 4.4180e-02, PNorm = 69.6359, GNorm = 0.5877, lr_0 = 1.8251e-04
Loss = 3.7882e-02, PNorm = 69.6381, GNorm = 0.4139, lr_0 = 1.8238e-04
Loss = 4.1788e-02, PNorm = 69.6414, GNorm = 0.5398, lr_0 = 1.8226e-04
Loss = 4.3058e-02, PNorm = 69.6433, GNorm = 0.5372, lr_0 = 1.8213e-04
Loss = 4.8247e-02, PNorm = 69.6451, GNorm = 0.6661, lr_0 = 1.8201e-04
Loss = 4.2016e-02, PNorm = 69.6484, GNorm = 0.5515, lr_0 = 1.8188e-04
Loss = 3.9363e-02, PNorm = 69.6524, GNorm = 0.5087, lr_0 = 1.8176e-04
Loss = 4.0488e-02, PNorm = 69.6553, GNorm = 0.7787, lr_0 = 1.8163e-04
Loss = 4.9975e-02, PNorm = 69.6582, GNorm = 0.4500, lr_0 = 1.8151e-04
Loss = 4.8803e-02, PNorm = 69.6607, GNorm = 0.4819, lr_0 = 1.8138e-04
Loss = 3.9486e-02, PNorm = 69.6616, GNorm = 0.4483, lr_0 = 1.8126e-04
Loss = 4.1899e-02, PNorm = 69.6627, GNorm = 0.5609, lr_0 = 1.8114e-04
Loss = 4.3204e-02, PNorm = 69.6657, GNorm = 0.4229, lr_0 = 1.8101e-04
Loss = 4.2850e-02, PNorm = 69.6693, GNorm = 0.8491, lr_0 = 1.8089e-04
Loss = 4.3695e-02, PNorm = 69.6721, GNorm = 0.6502, lr_0 = 1.8076e-04
Loss = 4.0536e-02, PNorm = 69.6737, GNorm = 0.4288, lr_0 = 1.8064e-04
Loss = 4.3191e-02, PNorm = 69.6758, GNorm = 0.3576, lr_0 = 1.8052e-04
Loss = 3.9655e-02, PNorm = 69.6777, GNorm = 0.5723, lr_0 = 1.8039e-04
Loss = 4.4737e-02, PNorm = 69.6797, GNorm = 0.4562, lr_0 = 1.8027e-04
Loss = 5.0817e-02, PNorm = 69.6812, GNorm = 0.4831, lr_0 = 1.8015e-04
Loss = 4.2892e-02, PNorm = 69.6846, GNorm = 0.5231, lr_0 = 1.8002e-04
Loss = 4.2453e-02, PNorm = 69.6888, GNorm = 0.5448, lr_0 = 1.7990e-04
Loss = 3.9537e-02, PNorm = 69.6929, GNorm = 0.4709, lr_0 = 1.7978e-04
Loss = 3.9409e-02, PNorm = 69.6968, GNorm = 0.4236, lr_0 = 1.7965e-04
Loss = 4.3599e-02, PNorm = 69.6991, GNorm = 0.4510, lr_0 = 1.7953e-04
Loss = 3.7650e-02, PNorm = 69.7011, GNorm = 0.5290, lr_0 = 1.7941e-04
Loss = 4.2682e-02, PNorm = 69.7030, GNorm = 0.5625, lr_0 = 1.7928e-04
Loss = 4.3897e-02, PNorm = 69.7051, GNorm = 0.6004, lr_0 = 1.7916e-04
Loss = 4.1417e-02, PNorm = 69.7069, GNorm = 0.5090, lr_0 = 1.7904e-04
Loss = 4.8438e-02, PNorm = 69.7103, GNorm = 0.3997, lr_0 = 1.7892e-04
Loss = 4.5063e-02, PNorm = 69.7137, GNorm = 0.5160, lr_0 = 1.7879e-04
Loss = 4.1425e-02, PNorm = 69.7158, GNorm = 0.4368, lr_0 = 1.7867e-04
Loss = 4.7561e-02, PNorm = 69.7175, GNorm = 0.4951, lr_0 = 1.7855e-04
Loss = 4.4787e-02, PNorm = 69.7219, GNorm = 0.5453, lr_0 = 1.7843e-04
Loss = 4.6595e-02, PNorm = 69.7258, GNorm = 0.4718, lr_0 = 1.7830e-04
Loss = 4.4781e-02, PNorm = 69.7278, GNorm = 0.4377, lr_0 = 1.7818e-04
Loss = 3.5642e-02, PNorm = 69.7292, GNorm = 0.5174, lr_0 = 1.7806e-04
Loss = 4.6785e-02, PNorm = 69.7299, GNorm = 0.4872, lr_0 = 1.7794e-04
Loss = 4.5131e-02, PNorm = 69.7327, GNorm = 0.5284, lr_0 = 1.7782e-04
Validation mae = 0.385342
Epoch 23
Loss = 3.8160e-02, PNorm = 69.7364, GNorm = 0.4537, lr_0 = 1.7769e-04
Loss = 3.5997e-02, PNorm = 69.7388, GNorm = 0.5450, lr_0 = 1.7757e-04
Loss = 2.9734e-02, PNorm = 69.7408, GNorm = 0.5698, lr_0 = 1.7745e-04
Loss = 3.0896e-02, PNorm = 69.7439, GNorm = 0.4289, lr_0 = 1.7733e-04
Loss = 4.1511e-02, PNorm = 69.7482, GNorm = 0.3963, lr_0 = 1.7721e-04
Loss = 3.4231e-02, PNorm = 69.7524, GNorm = 0.5289, lr_0 = 1.7709e-04
Loss = 4.0566e-02, PNorm = 69.7558, GNorm = 0.4800, lr_0 = 1.7696e-04
Loss = 4.7226e-02, PNorm = 69.7574, GNorm = 0.5239, lr_0 = 1.7684e-04
Loss = 3.7318e-02, PNorm = 69.7594, GNorm = 0.4520, lr_0 = 1.7672e-04
Loss = 3.9900e-02, PNorm = 69.7618, GNorm = 0.5445, lr_0 = 1.7660e-04
Loss = 4.1068e-02, PNorm = 69.7644, GNorm = 0.7065, lr_0 = 1.7648e-04
Loss = 3.6909e-02, PNorm = 69.7683, GNorm = 0.4215, lr_0 = 1.7636e-04
Loss = 4.1121e-02, PNorm = 69.7698, GNorm = 0.5366, lr_0 = 1.7624e-04
Loss = 3.7648e-02, PNorm = 69.7716, GNorm = 0.6955, lr_0 = 1.7612e-04
Loss = 3.6721e-02, PNorm = 69.7740, GNorm = 0.4117, lr_0 = 1.7600e-04
Loss = 3.2842e-02, PNorm = 69.7777, GNorm = 0.6377, lr_0 = 1.7588e-04
Loss = 4.5051e-02, PNorm = 69.7819, GNorm = 0.4469, lr_0 = 1.7576e-04
Loss = 3.3476e-02, PNorm = 69.7862, GNorm = 0.4541, lr_0 = 1.7564e-04
Loss = 3.9838e-02, PNorm = 69.7887, GNorm = 0.5810, lr_0 = 1.7552e-04
Loss = 3.9076e-02, PNorm = 69.7905, GNorm = 0.3640, lr_0 = 1.7540e-04
Loss = 4.2146e-02, PNorm = 69.7925, GNorm = 0.4987, lr_0 = 1.7528e-04
Loss = 3.5793e-02, PNorm = 69.7963, GNorm = 0.4350, lr_0 = 1.7516e-04
Loss = 4.0599e-02, PNorm = 69.7996, GNorm = 0.3775, lr_0 = 1.7504e-04
Loss = 4.0232e-02, PNorm = 69.8021, GNorm = 0.6483, lr_0 = 1.7492e-04
Loss = 3.7996e-02, PNorm = 69.8043, GNorm = 0.6217, lr_0 = 1.7480e-04
Loss = 3.9800e-02, PNorm = 69.8066, GNorm = 0.6193, lr_0 = 1.7468e-04
Loss = 3.9214e-02, PNorm = 69.8091, GNorm = 0.3780, lr_0 = 1.7456e-04
Loss = 3.7353e-02, PNorm = 69.8136, GNorm = 0.5164, lr_0 = 1.7444e-04
Loss = 4.0194e-02, PNorm = 69.8166, GNorm = 0.5829, lr_0 = 1.7432e-04
Loss = 4.2907e-02, PNorm = 69.8206, GNorm = 0.5499, lr_0 = 1.7420e-04
Loss = 3.8725e-02, PNorm = 69.8239, GNorm = 0.4683, lr_0 = 1.7408e-04
Loss = 3.5479e-02, PNorm = 69.8277, GNorm = 0.4032, lr_0 = 1.7396e-04
Loss = 4.2278e-02, PNorm = 69.8300, GNorm = 0.8117, lr_0 = 1.7384e-04
Loss = 4.1121e-02, PNorm = 69.8313, GNorm = 0.4550, lr_0 = 1.7372e-04
Loss = 4.0345e-02, PNorm = 69.8320, GNorm = 0.6498, lr_0 = 1.7360e-04
Loss = 4.4541e-02, PNorm = 69.8341, GNorm = 0.4690, lr_0 = 1.7348e-04
Loss = 4.9375e-02, PNorm = 69.8368, GNorm = 0.4014, lr_0 = 1.7336e-04
Loss = 4.0817e-02, PNorm = 69.8408, GNorm = 0.6151, lr_0 = 1.7325e-04
Loss = 3.6106e-02, PNorm = 69.8432, GNorm = 0.5454, lr_0 = 1.7313e-04
Loss = 4.1265e-02, PNorm = 69.8484, GNorm = 0.5225, lr_0 = 1.7301e-04
Loss = 3.6124e-02, PNorm = 69.8518, GNorm = 0.5891, lr_0 = 1.7289e-04
Loss = 3.9636e-02, PNorm = 69.8557, GNorm = 0.4705, lr_0 = 1.7277e-04
Loss = 3.6332e-02, PNorm = 69.8599, GNorm = 0.6928, lr_0 = 1.7265e-04
Loss = 4.4929e-02, PNorm = 69.8621, GNorm = 0.6229, lr_0 = 1.7253e-04
Loss = 3.6206e-02, PNorm = 69.8643, GNorm = 0.4511, lr_0 = 1.7242e-04
Loss = 4.0839e-02, PNorm = 69.8673, GNorm = 0.4002, lr_0 = 1.7230e-04
Loss = 3.6227e-02, PNorm = 69.8699, GNorm = 0.4655, lr_0 = 1.7218e-04
Loss = 3.6542e-02, PNorm = 69.8711, GNorm = 0.5122, lr_0 = 1.7206e-04
Loss = 3.7618e-02, PNorm = 69.8726, GNorm = 0.6359, lr_0 = 1.7194e-04
Loss = 3.7483e-02, PNorm = 69.8743, GNorm = 0.3934, lr_0 = 1.7183e-04
Loss = 3.8386e-02, PNorm = 69.8769, GNorm = 0.4324, lr_0 = 1.7171e-04
Loss = 3.7542e-02, PNorm = 69.8783, GNorm = 0.4710, lr_0 = 1.7159e-04
Loss = 3.9669e-02, PNorm = 69.8804, GNorm = 0.6202, lr_0 = 1.7147e-04
Loss = 4.0093e-02, PNorm = 69.8814, GNorm = 0.5237, lr_0 = 1.7136e-04
Loss = 3.8164e-02, PNorm = 69.8833, GNorm = 0.5048, lr_0 = 1.7124e-04
Loss = 4.5590e-02, PNorm = 69.8849, GNorm = 0.6264, lr_0 = 1.7112e-04
Loss = 3.9386e-02, PNorm = 69.8878, GNorm = 0.4543, lr_0 = 1.7100e-04
Loss = 3.5073e-02, PNorm = 69.8910, GNorm = 0.5118, lr_0 = 1.7089e-04
Loss = 4.1379e-02, PNorm = 69.8918, GNorm = 0.6870, lr_0 = 1.7077e-04
Loss = 3.6340e-02, PNorm = 69.8939, GNorm = 0.4659, lr_0 = 1.7065e-04
Loss = 5.0438e-02, PNorm = 69.8982, GNorm = 0.5606, lr_0 = 1.7054e-04
Loss = 3.9194e-02, PNorm = 69.9028, GNorm = 0.4159, lr_0 = 1.7042e-04
Loss = 4.2644e-02, PNorm = 69.9052, GNorm = 0.5087, lr_0 = 1.7030e-04
Loss = 4.1847e-02, PNorm = 69.9074, GNorm = 0.4467, lr_0 = 1.7019e-04
Loss = 3.7078e-02, PNorm = 69.9092, GNorm = 0.4975, lr_0 = 1.7007e-04
Loss = 4.3286e-02, PNorm = 69.9121, GNorm = 0.6061, lr_0 = 1.6995e-04
Loss = 3.4755e-02, PNorm = 69.9137, GNorm = 0.4256, lr_0 = 1.6984e-04
Loss = 4.5336e-02, PNorm = 69.9152, GNorm = 0.7085, lr_0 = 1.6972e-04
Loss = 3.8680e-02, PNorm = 69.9192, GNorm = 0.3788, lr_0 = 1.6960e-04
Loss = 4.0938e-02, PNorm = 69.9228, GNorm = 0.8274, lr_0 = 1.6949e-04
Loss = 4.1926e-02, PNorm = 69.9259, GNorm = 0.2940, lr_0 = 1.6937e-04
Loss = 4.4960e-02, PNorm = 69.9302, GNorm = 0.5771, lr_0 = 1.6926e-04
Loss = 4.0291e-02, PNorm = 69.9341, GNorm = 0.7883, lr_0 = 1.6914e-04
Loss = 3.7563e-02, PNorm = 69.9370, GNorm = 0.5462, lr_0 = 1.6902e-04
Loss = 4.4490e-02, PNorm = 69.9394, GNorm = 0.6420, lr_0 = 1.6891e-04
Loss = 3.9458e-02, PNorm = 69.9422, GNorm = 0.6130, lr_0 = 1.6879e-04
Loss = 4.0720e-02, PNorm = 69.9454, GNorm = 0.6401, lr_0 = 1.6868e-04
Loss = 4.6745e-02, PNorm = 69.9473, GNorm = 0.7205, lr_0 = 1.6856e-04
Loss = 3.8872e-02, PNorm = 69.9490, GNorm = 0.6788, lr_0 = 1.6845e-04
Loss = 4.0952e-02, PNorm = 69.9528, GNorm = 0.4264, lr_0 = 1.6833e-04
Loss = 4.9192e-02, PNorm = 69.9547, GNorm = 0.4947, lr_0 = 1.6821e-04
Loss = 4.3567e-02, PNorm = 69.9586, GNorm = 0.3162, lr_0 = 1.6810e-04
Loss = 5.0111e-02, PNorm = 69.9632, GNorm = 0.6995, lr_0 = 1.6798e-04
Loss = 4.0907e-02, PNorm = 69.9650, GNorm = 0.4808, lr_0 = 1.6787e-04
Loss = 3.9499e-02, PNorm = 69.9671, GNorm = 0.6069, lr_0 = 1.6775e-04
Loss = 4.4596e-02, PNorm = 69.9705, GNorm = 0.4505, lr_0 = 1.6764e-04
Loss = 3.9622e-02, PNorm = 69.9726, GNorm = 0.4481, lr_0 = 1.6752e-04
Loss = 4.0684e-02, PNorm = 69.9746, GNorm = 0.4339, lr_0 = 1.6741e-04
Loss = 4.8072e-02, PNorm = 69.9764, GNorm = 0.5465, lr_0 = 1.6729e-04
Loss = 4.3294e-02, PNorm = 69.9787, GNorm = 0.4842, lr_0 = 1.6718e-04
Loss = 4.4044e-02, PNorm = 69.9807, GNorm = 0.4701, lr_0 = 1.6707e-04
Loss = 5.0775e-02, PNorm = 69.9841, GNorm = 0.5162, lr_0 = 1.6695e-04
Loss = 4.0957e-02, PNorm = 69.9877, GNorm = 0.4456, lr_0 = 1.6684e-04
Loss = 3.9451e-02, PNorm = 69.9910, GNorm = 0.3952, lr_0 = 1.6672e-04
Loss = 3.7303e-02, PNorm = 69.9932, GNorm = 0.5149, lr_0 = 1.6661e-04
Loss = 4.0285e-02, PNorm = 69.9938, GNorm = 0.5507, lr_0 = 1.6649e-04
Loss = 3.9950e-02, PNorm = 69.9958, GNorm = 0.5045, lr_0 = 1.6638e-04
Loss = 4.1352e-02, PNorm = 69.9969, GNorm = 0.5186, lr_0 = 1.6627e-04
Loss = 4.6530e-02, PNorm = 69.9991, GNorm = 0.4060, lr_0 = 1.6615e-04
Loss = 4.2942e-02, PNorm = 70.0011, GNorm = 0.5388, lr_0 = 1.6604e-04
Loss = 5.0970e-02, PNorm = 70.0034, GNorm = 0.6821, lr_0 = 1.6592e-04
Loss = 3.8537e-02, PNorm = 70.0042, GNorm = 0.5697, lr_0 = 1.6581e-04
Loss = 4.4161e-02, PNorm = 70.0055, GNorm = 0.3908, lr_0 = 1.6570e-04
Loss = 3.7492e-02, PNorm = 70.0075, GNorm = 0.6066, lr_0 = 1.6558e-04
Loss = 4.0773e-02, PNorm = 70.0092, GNorm = 0.4977, lr_0 = 1.6547e-04
Loss = 3.9059e-02, PNorm = 70.0119, GNorm = 0.4341, lr_0 = 1.6536e-04
Loss = 4.2498e-02, PNorm = 70.0137, GNorm = 0.4638, lr_0 = 1.6524e-04
Loss = 4.0554e-02, PNorm = 70.0157, GNorm = 0.7800, lr_0 = 1.6513e-04
Loss = 4.1023e-02, PNorm = 70.0182, GNorm = 0.5125, lr_0 = 1.6502e-04
Loss = 4.6077e-02, PNorm = 70.0212, GNorm = 0.4948, lr_0 = 1.6490e-04
Loss = 4.1718e-02, PNorm = 70.0238, GNorm = 0.5790, lr_0 = 1.6479e-04
Loss = 4.0763e-02, PNorm = 70.0247, GNorm = 0.4180, lr_0 = 1.6468e-04
Loss = 4.2006e-02, PNorm = 70.0273, GNorm = 0.7190, lr_0 = 1.6457e-04
Loss = 4.1669e-02, PNorm = 70.0306, GNorm = 0.4267, lr_0 = 1.6445e-04
Loss = 3.9825e-02, PNorm = 70.0323, GNorm = 0.6008, lr_0 = 1.6434e-04
Loss = 3.9382e-02, PNorm = 70.0352, GNorm = 0.4260, lr_0 = 1.6423e-04
Loss = 4.6054e-02, PNorm = 70.0397, GNorm = 0.4414, lr_0 = 1.6412e-04
Loss = 4.0698e-02, PNorm = 70.0428, GNorm = 0.4726, lr_0 = 1.6400e-04
Loss = 3.7241e-02, PNorm = 70.0457, GNorm = 0.4365, lr_0 = 1.6389e-04
Loss = 3.9543e-02, PNorm = 70.0492, GNorm = 0.3496, lr_0 = 1.6378e-04
Validation mae = 0.386775
Epoch 24
Loss = 3.7258e-02, PNorm = 70.0511, GNorm = 0.6231, lr_0 = 1.6367e-04
Loss = 3.4654e-02, PNorm = 70.0537, GNorm = 0.4487, lr_0 = 1.6355e-04
Loss = 4.3048e-02, PNorm = 70.0569, GNorm = 0.6838, lr_0 = 1.6344e-04
Loss = 3.2864e-02, PNorm = 70.0603, GNorm = 0.6618, lr_0 = 1.6333e-04
Loss = 3.6744e-02, PNorm = 70.0626, GNorm = 0.4850, lr_0 = 1.6322e-04
Loss = 3.8756e-02, PNorm = 70.0644, GNorm = 0.3517, lr_0 = 1.6311e-04
Loss = 3.6013e-02, PNorm = 70.0668, GNorm = 0.4234, lr_0 = 1.6299e-04
Loss = 3.5982e-02, PNorm = 70.0690, GNorm = 0.4776, lr_0 = 1.6288e-04
Loss = 3.5368e-02, PNorm = 70.0717, GNorm = 0.3940, lr_0 = 1.6277e-04
Loss = 3.6035e-02, PNorm = 70.0739, GNorm = 0.3797, lr_0 = 1.6266e-04
Loss = 3.5438e-02, PNorm = 70.0753, GNorm = 0.4667, lr_0 = 1.6255e-04
Loss = 3.3114e-02, PNorm = 70.0791, GNorm = 0.4587, lr_0 = 1.6244e-04
Loss = 3.6040e-02, PNorm = 70.0825, GNorm = 0.6005, lr_0 = 1.6233e-04
Loss = 3.5337e-02, PNorm = 70.0841, GNorm = 0.3667, lr_0 = 1.6221e-04
Loss = 4.0466e-02, PNorm = 70.0863, GNorm = 0.6388, lr_0 = 1.6210e-04
Loss = 3.6045e-02, PNorm = 70.0876, GNorm = 0.4247, lr_0 = 1.6199e-04
Loss = 4.4190e-02, PNorm = 70.0896, GNorm = 0.6575, lr_0 = 1.6188e-04
Loss = 4.0985e-02, PNorm = 70.0920, GNorm = 0.4938, lr_0 = 1.6177e-04
Loss = 3.9063e-02, PNorm = 70.0935, GNorm = 0.6653, lr_0 = 1.6166e-04
Loss = 3.1709e-02, PNorm = 70.0963, GNorm = 0.3941, lr_0 = 1.6155e-04
Loss = 4.3938e-02, PNorm = 70.1000, GNorm = 0.4396, lr_0 = 1.6144e-04
Loss = 3.5618e-02, PNorm = 70.1036, GNorm = 0.4489, lr_0 = 1.6133e-04
Loss = 3.6652e-02, PNorm = 70.1061, GNorm = 0.4489, lr_0 = 1.6122e-04
Loss = 4.1958e-02, PNorm = 70.1085, GNorm = 0.5324, lr_0 = 1.6111e-04
Loss = 3.4898e-02, PNorm = 70.1099, GNorm = 0.4609, lr_0 = 1.6100e-04
Loss = 3.1831e-02, PNorm = 70.1114, GNorm = 0.7639, lr_0 = 1.6089e-04
Loss = 3.8483e-02, PNorm = 70.1141, GNorm = 0.5322, lr_0 = 1.6078e-04
Loss = 3.4139e-02, PNorm = 70.1167, GNorm = 0.4637, lr_0 = 1.6067e-04
Loss = 3.7083e-02, PNorm = 70.1180, GNorm = 0.4816, lr_0 = 1.6056e-04
Loss = 3.3069e-02, PNorm = 70.1211, GNorm = 0.5082, lr_0 = 1.6045e-04
Loss = 3.5345e-02, PNorm = 70.1241, GNorm = 0.6675, lr_0 = 1.6034e-04
Loss = 3.4142e-02, PNorm = 70.1279, GNorm = 0.5102, lr_0 = 1.6023e-04
Loss = 3.2751e-02, PNorm = 70.1306, GNorm = 0.4173, lr_0 = 1.6012e-04
Loss = 4.2835e-02, PNorm = 70.1333, GNorm = 0.4302, lr_0 = 1.6001e-04
Loss = 3.5095e-02, PNorm = 70.1356, GNorm = 0.4726, lr_0 = 1.5990e-04
Loss = 3.8722e-02, PNorm = 70.1371, GNorm = 0.5032, lr_0 = 1.5979e-04
Loss = 3.2440e-02, PNorm = 70.1393, GNorm = 0.3852, lr_0 = 1.5968e-04
Loss = 3.7546e-02, PNorm = 70.1410, GNorm = 0.4588, lr_0 = 1.5957e-04
Loss = 4.0177e-02, PNorm = 70.1425, GNorm = 0.5268, lr_0 = 1.5946e-04
Loss = 3.5299e-02, PNorm = 70.1444, GNorm = 0.4890, lr_0 = 1.5935e-04
Loss = 3.3967e-02, PNorm = 70.1462, GNorm = 0.5434, lr_0 = 1.5924e-04
Loss = 3.6457e-02, PNorm = 70.1491, GNorm = 0.4175, lr_0 = 1.5913e-04
Loss = 3.9563e-02, PNorm = 70.1521, GNorm = 0.5547, lr_0 = 1.5902e-04
Loss = 3.5502e-02, PNorm = 70.1555, GNorm = 0.3499, lr_0 = 1.5891e-04
Loss = 3.7750e-02, PNorm = 70.1589, GNorm = 0.5617, lr_0 = 1.5880e-04
Loss = 3.9147e-02, PNorm = 70.1604, GNorm = 0.3949, lr_0 = 1.5870e-04
Loss = 4.0527e-02, PNorm = 70.1615, GNorm = 0.4956, lr_0 = 1.5859e-04
Loss = 3.8386e-02, PNorm = 70.1625, GNorm = 0.6302, lr_0 = 1.5848e-04
Loss = 3.5781e-02, PNorm = 70.1637, GNorm = 0.6342, lr_0 = 1.5837e-04
Loss = 4.6368e-02, PNorm = 70.1665, GNorm = 0.4517, lr_0 = 1.5826e-04
Loss = 3.6171e-02, PNorm = 70.1685, GNorm = 0.5161, lr_0 = 1.5815e-04
Loss = 3.7480e-02, PNorm = 70.1712, GNorm = 0.5559, lr_0 = 1.5804e-04
Loss = 3.9277e-02, PNorm = 70.1735, GNorm = 0.6154, lr_0 = 1.5794e-04
Loss = 3.9736e-02, PNorm = 70.1764, GNorm = 0.5802, lr_0 = 1.5783e-04
Loss = 3.8202e-02, PNorm = 70.1794, GNorm = 0.4426, lr_0 = 1.5772e-04
Loss = 4.1557e-02, PNorm = 70.1830, GNorm = 0.9528, lr_0 = 1.5761e-04
Loss = 3.8690e-02, PNorm = 70.1852, GNorm = 0.5188, lr_0 = 1.5750e-04
Loss = 3.5692e-02, PNorm = 70.1877, GNorm = 0.5685, lr_0 = 1.5740e-04
Loss = 4.0549e-02, PNorm = 70.1900, GNorm = 0.4633, lr_0 = 1.5729e-04
Loss = 3.2308e-02, PNorm = 70.1905, GNorm = 0.4667, lr_0 = 1.5718e-04
Loss = 3.4273e-02, PNorm = 70.1916, GNorm = 0.3885, lr_0 = 1.5707e-04
Loss = 3.3833e-02, PNorm = 70.1940, GNorm = 0.3837, lr_0 = 1.5697e-04
Loss = 3.6042e-02, PNorm = 70.1950, GNorm = 0.8060, lr_0 = 1.5686e-04
Loss = 4.4336e-02, PNorm = 70.1950, GNorm = 0.4973, lr_0 = 1.5675e-04
Loss = 3.9903e-02, PNorm = 70.1968, GNorm = 0.5043, lr_0 = 1.5664e-04
Loss = 3.4990e-02, PNorm = 70.1986, GNorm = 0.6435, lr_0 = 1.5654e-04
Loss = 4.5271e-02, PNorm = 70.2015, GNorm = 0.5995, lr_0 = 1.5643e-04
Loss = 3.4961e-02, PNorm = 70.2038, GNorm = 0.4013, lr_0 = 1.5632e-04
Loss = 3.5966e-02, PNorm = 70.2068, GNorm = 0.5962, lr_0 = 1.5621e-04
Loss = 4.0759e-02, PNorm = 70.2097, GNorm = 0.5021, lr_0 = 1.5611e-04
Loss = 3.4073e-02, PNorm = 70.2124, GNorm = 0.6074, lr_0 = 1.5600e-04
Loss = 3.8822e-02, PNorm = 70.2137, GNorm = 0.4651, lr_0 = 1.5589e-04
Loss = 4.4180e-02, PNorm = 70.2163, GNorm = 0.3555, lr_0 = 1.5579e-04
Loss = 3.6579e-02, PNorm = 70.2203, GNorm = 0.3575, lr_0 = 1.5568e-04
Loss = 3.8719e-02, PNorm = 70.2238, GNorm = 0.6287, lr_0 = 1.5557e-04
Loss = 3.2983e-02, PNorm = 70.2262, GNorm = 0.3694, lr_0 = 1.5547e-04
Loss = 3.5809e-02, PNorm = 70.2276, GNorm = 0.6364, lr_0 = 1.5536e-04
Loss = 4.0119e-02, PNorm = 70.2294, GNorm = 0.5607, lr_0 = 1.5525e-04
Loss = 4.4261e-02, PNorm = 70.2318, GNorm = 0.5300, lr_0 = 1.5515e-04
Loss = 3.8224e-02, PNorm = 70.2339, GNorm = 0.5263, lr_0 = 1.5504e-04
Loss = 3.8712e-02, PNorm = 70.2367, GNorm = 0.5171, lr_0 = 1.5493e-04
Loss = 3.9536e-02, PNorm = 70.2395, GNorm = 0.4134, lr_0 = 1.5483e-04
Loss = 4.0755e-02, PNorm = 70.2418, GNorm = 0.6066, lr_0 = 1.5472e-04
Loss = 3.8857e-02, PNorm = 70.2433, GNorm = 0.4032, lr_0 = 1.5462e-04
Loss = 3.4057e-02, PNorm = 70.2438, GNorm = 0.4587, lr_0 = 1.5451e-04
Loss = 3.7949e-02, PNorm = 70.2444, GNorm = 0.4642, lr_0 = 1.5440e-04
Loss = 4.2546e-02, PNorm = 70.2460, GNorm = 0.6983, lr_0 = 1.5430e-04
Loss = 4.0207e-02, PNorm = 70.2492, GNorm = 0.5577, lr_0 = 1.5419e-04
Loss = 3.9630e-02, PNorm = 70.2534, GNorm = 0.6592, lr_0 = 1.5409e-04
Loss = 4.3471e-02, PNorm = 70.2569, GNorm = 0.5962, lr_0 = 1.5398e-04
Loss = 3.5849e-02, PNorm = 70.2589, GNorm = 0.7512, lr_0 = 1.5388e-04
Loss = 3.6672e-02, PNorm = 70.2610, GNorm = 0.5212, lr_0 = 1.5377e-04
Loss = 4.1418e-02, PNorm = 70.2633, GNorm = 0.4842, lr_0 = 1.5367e-04
Loss = 4.0184e-02, PNorm = 70.2664, GNorm = 0.5170, lr_0 = 1.5356e-04
Loss = 4.1289e-02, PNorm = 70.2695, GNorm = 0.3992, lr_0 = 1.5346e-04
Loss = 3.4816e-02, PNorm = 70.2718, GNorm = 0.4161, lr_0 = 1.5335e-04
Loss = 4.0153e-02, PNorm = 70.2732, GNorm = 0.5307, lr_0 = 1.5325e-04
Loss = 3.5406e-02, PNorm = 70.2754, GNorm = 0.4183, lr_0 = 1.5314e-04
Loss = 5.1496e-02, PNorm = 70.2770, GNorm = 0.7617, lr_0 = 1.5304e-04
Loss = 3.7448e-02, PNorm = 70.2805, GNorm = 0.4295, lr_0 = 1.5293e-04
Loss = 4.4494e-02, PNorm = 70.2846, GNorm = 0.5460, lr_0 = 1.5283e-04
Loss = 4.6516e-02, PNorm = 70.2879, GNorm = 0.9687, lr_0 = 1.5272e-04
Loss = 4.3214e-02, PNorm = 70.2918, GNorm = 0.4716, lr_0 = 1.5262e-04
Loss = 3.8887e-02, PNorm = 70.2947, GNorm = 0.3854, lr_0 = 1.5251e-04
Loss = 4.1822e-02, PNorm = 70.2968, GNorm = 0.4824, lr_0 = 1.5241e-04
Loss = 4.1724e-02, PNorm = 70.2991, GNorm = 0.6899, lr_0 = 1.5230e-04
Loss = 4.2983e-02, PNorm = 70.3008, GNorm = 0.4354, lr_0 = 1.5220e-04
Loss = 4.2037e-02, PNorm = 70.3032, GNorm = 0.6239, lr_0 = 1.5209e-04
Loss = 3.6946e-02, PNorm = 70.3052, GNorm = 0.5824, lr_0 = 1.5199e-04
Loss = 3.5064e-02, PNorm = 70.3073, GNorm = 0.4951, lr_0 = 1.5189e-04
Loss = 4.7307e-02, PNorm = 70.3097, GNorm = 0.4574, lr_0 = 1.5178e-04
Loss = 4.5884e-02, PNorm = 70.3115, GNorm = 0.8332, lr_0 = 1.5168e-04
Loss = 3.8749e-02, PNorm = 70.3119, GNorm = 0.5729, lr_0 = 1.5157e-04
Loss = 3.5208e-02, PNorm = 70.3143, GNorm = 0.4956, lr_0 = 1.5147e-04
Loss = 3.7423e-02, PNorm = 70.3166, GNorm = 0.5482, lr_0 = 1.5137e-04
Loss = 4.7453e-02, PNorm = 70.3188, GNorm = 0.7649, lr_0 = 1.5126e-04
Loss = 4.3438e-02, PNorm = 70.3203, GNorm = 0.7224, lr_0 = 1.5116e-04
Loss = 3.6939e-02, PNorm = 70.3233, GNorm = 0.4431, lr_0 = 1.5106e-04
Loss = 3.8087e-02, PNorm = 70.3243, GNorm = 0.5178, lr_0 = 1.5095e-04
Loss = 3.9887e-02, PNorm = 70.3258, GNorm = 0.4565, lr_0 = 1.5085e-04
Validation mae = 0.388999
Epoch 25
Loss = 3.7952e-02, PNorm = 70.3286, GNorm = 0.5028, lr_0 = 1.5075e-04
Loss = 3.5541e-02, PNorm = 70.3315, GNorm = 0.3514, lr_0 = 1.5064e-04
Loss = 2.9838e-02, PNorm = 70.3347, GNorm = 0.3817, lr_0 = 1.5054e-04
Loss = 4.1795e-02, PNorm = 70.3376, GNorm = 0.4544, lr_0 = 1.5044e-04
Loss = 3.4793e-02, PNorm = 70.3408, GNorm = 0.4923, lr_0 = 1.5033e-04
Loss = 3.7400e-02, PNorm = 70.3431, GNorm = 0.4155, lr_0 = 1.5023e-04
Loss = 2.8344e-02, PNorm = 70.3456, GNorm = 0.8350, lr_0 = 1.5013e-04
Loss = 3.6773e-02, PNorm = 70.3500, GNorm = 0.4838, lr_0 = 1.5002e-04
Loss = 3.2133e-02, PNorm = 70.3528, GNorm = 0.5953, lr_0 = 1.4992e-04
Loss = 4.0283e-02, PNorm = 70.3554, GNorm = 0.4308, lr_0 = 1.4982e-04
Loss = 3.6347e-02, PNorm = 70.3570, GNorm = 0.3960, lr_0 = 1.4972e-04
Loss = 3.6282e-02, PNorm = 70.3587, GNorm = 0.4306, lr_0 = 1.4961e-04
Loss = 3.5860e-02, PNorm = 70.3604, GNorm = 0.5926, lr_0 = 1.4951e-04
Loss = 3.4084e-02, PNorm = 70.3618, GNorm = 0.4413, lr_0 = 1.4941e-04
Loss = 3.4421e-02, PNorm = 70.3627, GNorm = 0.7515, lr_0 = 1.4931e-04
Loss = 2.9647e-02, PNorm = 70.3655, GNorm = 0.3479, lr_0 = 1.4920e-04
Loss = 3.3916e-02, PNorm = 70.3675, GNorm = 0.3939, lr_0 = 1.4910e-04
Loss = 3.3253e-02, PNorm = 70.3689, GNorm = 0.4506, lr_0 = 1.4900e-04
Loss = 3.3386e-02, PNorm = 70.3701, GNorm = 0.4623, lr_0 = 1.4890e-04
Loss = 3.9236e-02, PNorm = 70.3718, GNorm = 0.4076, lr_0 = 1.4880e-04
Loss = 3.6297e-02, PNorm = 70.3743, GNorm = 0.5275, lr_0 = 1.4869e-04
Loss = 3.3421e-02, PNorm = 70.3769, GNorm = 0.4502, lr_0 = 1.4859e-04
Loss = 3.9611e-02, PNorm = 70.3780, GNorm = 0.4402, lr_0 = 1.4849e-04
Loss = 3.4952e-02, PNorm = 70.3795, GNorm = 0.4409, lr_0 = 1.4839e-04
Loss = 3.3001e-02, PNorm = 70.3807, GNorm = 0.4160, lr_0 = 1.4829e-04
Loss = 2.9080e-02, PNorm = 70.3823, GNorm = 0.4596, lr_0 = 1.4818e-04
Loss = 3.0042e-02, PNorm = 70.3844, GNorm = 0.3826, lr_0 = 1.4808e-04
Loss = 3.0183e-02, PNorm = 70.3867, GNorm = 0.4391, lr_0 = 1.4798e-04
Loss = 3.3134e-02, PNorm = 70.3888, GNorm = 0.6271, lr_0 = 1.4788e-04
Loss = 2.9199e-02, PNorm = 70.3908, GNorm = 0.5872, lr_0 = 1.4778e-04
Loss = 3.6667e-02, PNorm = 70.3934, GNorm = 0.8171, lr_0 = 1.4768e-04
Loss = 3.5588e-02, PNorm = 70.3970, GNorm = 0.4568, lr_0 = 1.4758e-04
Loss = 3.1785e-02, PNorm = 70.3992, GNorm = 0.3747, lr_0 = 1.4748e-04
Loss = 3.5797e-02, PNorm = 70.4007, GNorm = 0.4347, lr_0 = 1.4737e-04
Loss = 4.1195e-02, PNorm = 70.4024, GNorm = 0.6458, lr_0 = 1.4727e-04
Loss = 3.2693e-02, PNorm = 70.4045, GNorm = 0.4602, lr_0 = 1.4717e-04
Loss = 3.3163e-02, PNorm = 70.4074, GNorm = 0.4217, lr_0 = 1.4707e-04
Loss = 3.4077e-02, PNorm = 70.4092, GNorm = 0.5278, lr_0 = 1.4697e-04
Loss = 3.6141e-02, PNorm = 70.4106, GNorm = 0.4175, lr_0 = 1.4687e-04
Loss = 3.4762e-02, PNorm = 70.4117, GNorm = 0.6783, lr_0 = 1.4677e-04
Loss = 3.5932e-02, PNorm = 70.4125, GNorm = 0.4262, lr_0 = 1.4667e-04
Loss = 3.4109e-02, PNorm = 70.4139, GNorm = 0.5110, lr_0 = 1.4657e-04
Loss = 3.8786e-02, PNorm = 70.4164, GNorm = 0.3744, lr_0 = 1.4647e-04
Loss = 3.4165e-02, PNorm = 70.4192, GNorm = 0.4942, lr_0 = 1.4637e-04
Loss = 3.2557e-02, PNorm = 70.4223, GNorm = 0.3804, lr_0 = 1.4627e-04
Loss = 4.5890e-02, PNorm = 70.4257, GNorm = 0.6403, lr_0 = 1.4617e-04
Loss = 3.3850e-02, PNorm = 70.4283, GNorm = 0.4682, lr_0 = 1.4607e-04
Loss = 3.4216e-02, PNorm = 70.4305, GNorm = 0.5172, lr_0 = 1.4597e-04
Loss = 3.4876e-02, PNorm = 70.4317, GNorm = 0.5918, lr_0 = 1.4587e-04
Loss = 3.1799e-02, PNorm = 70.4334, GNorm = 0.5117, lr_0 = 1.4577e-04
Loss = 3.5287e-02, PNorm = 70.4366, GNorm = 0.5263, lr_0 = 1.4567e-04
Loss = 4.1270e-02, PNorm = 70.4386, GNorm = 0.6584, lr_0 = 1.4557e-04
Loss = 3.4409e-02, PNorm = 70.4399, GNorm = 0.4366, lr_0 = 1.4547e-04
Loss = 3.1346e-02, PNorm = 70.4418, GNorm = 0.3189, lr_0 = 1.4537e-04
Loss = 4.2129e-02, PNorm = 70.4437, GNorm = 0.6762, lr_0 = 1.4527e-04
Loss = 3.7917e-02, PNorm = 70.4453, GNorm = 0.4145, lr_0 = 1.4517e-04
Loss = 3.4608e-02, PNorm = 70.4477, GNorm = 0.4402, lr_0 = 1.4507e-04
Loss = 3.5431e-02, PNorm = 70.4502, GNorm = 0.4739, lr_0 = 1.4497e-04
Loss = 3.9906e-02, PNorm = 70.4521, GNorm = 0.5248, lr_0 = 1.4487e-04
Loss = 3.6137e-02, PNorm = 70.4541, GNorm = 0.9341, lr_0 = 1.4477e-04
Loss = 3.4977e-02, PNorm = 70.4559, GNorm = 0.3551, lr_0 = 1.4467e-04
Loss = 3.7854e-02, PNorm = 70.4570, GNorm = 0.5258, lr_0 = 1.4457e-04
Loss = 3.5918e-02, PNorm = 70.4595, GNorm = 0.4794, lr_0 = 1.4447e-04
Loss = 3.8132e-02, PNorm = 70.4629, GNorm = 0.3926, lr_0 = 1.4438e-04
Loss = 3.7960e-02, PNorm = 70.4658, GNorm = 0.5266, lr_0 = 1.4428e-04
Loss = 3.9764e-02, PNorm = 70.4677, GNorm = 0.5322, lr_0 = 1.4418e-04
Loss = 4.2444e-02, PNorm = 70.4696, GNorm = 0.4707, lr_0 = 1.4408e-04
Loss = 3.8492e-02, PNorm = 70.4711, GNorm = 0.5118, lr_0 = 1.4398e-04
Loss = 3.4910e-02, PNorm = 70.4725, GNorm = 0.4761, lr_0 = 1.4388e-04
Loss = 3.6402e-02, PNorm = 70.4740, GNorm = 0.6029, lr_0 = 1.4378e-04
Loss = 4.3577e-02, PNorm = 70.4759, GNorm = 0.4547, lr_0 = 1.4368e-04
Loss = 3.3787e-02, PNorm = 70.4795, GNorm = 0.4031, lr_0 = 1.4359e-04
Loss = 3.4712e-02, PNorm = 70.4827, GNorm = 0.5938, lr_0 = 1.4349e-04
Loss = 3.3965e-02, PNorm = 70.4845, GNorm = 0.3904, lr_0 = 1.4339e-04
Loss = 3.7664e-02, PNorm = 70.4860, GNorm = 0.4929, lr_0 = 1.4329e-04
Loss = 3.3369e-02, PNorm = 70.4874, GNorm = 0.4614, lr_0 = 1.4319e-04
Loss = 4.0148e-02, PNorm = 70.4888, GNorm = 0.4700, lr_0 = 1.4310e-04
Loss = 3.8570e-02, PNorm = 70.4910, GNorm = 0.5139, lr_0 = 1.4300e-04
Loss = 3.9907e-02, PNorm = 70.4933, GNorm = 0.5303, lr_0 = 1.4290e-04
Loss = 3.8441e-02, PNorm = 70.4951, GNorm = 0.4831, lr_0 = 1.4280e-04
Loss = 4.1562e-02, PNorm = 70.4967, GNorm = 0.6659, lr_0 = 1.4270e-04
Loss = 3.8600e-02, PNorm = 70.4982, GNorm = 0.4205, lr_0 = 1.4261e-04
Loss = 4.2034e-02, PNorm = 70.5002, GNorm = 0.5330, lr_0 = 1.4251e-04
Loss = 4.1453e-02, PNorm = 70.5014, GNorm = 0.5020, lr_0 = 1.4241e-04
Loss = 3.8352e-02, PNorm = 70.5042, GNorm = 0.4863, lr_0 = 1.4231e-04
Loss = 3.4912e-02, PNorm = 70.5061, GNorm = 0.4101, lr_0 = 1.4222e-04
Loss = 4.6887e-02, PNorm = 70.5076, GNorm = 0.5014, lr_0 = 1.4212e-04
Loss = 4.8178e-02, PNorm = 70.5091, GNorm = 0.4852, lr_0 = 1.4202e-04
Loss = 4.0671e-02, PNorm = 70.5116, GNorm = 0.6568, lr_0 = 1.4192e-04
Loss = 3.8248e-02, PNorm = 70.5130, GNorm = 0.5056, lr_0 = 1.4183e-04
Loss = 3.9268e-02, PNorm = 70.5142, GNorm = 0.5806, lr_0 = 1.4173e-04
Loss = 3.7149e-02, PNorm = 70.5160, GNorm = 0.5508, lr_0 = 1.4163e-04
Loss = 3.5142e-02, PNorm = 70.5176, GNorm = 0.4665, lr_0 = 1.4153e-04
Loss = 3.8463e-02, PNorm = 70.5200, GNorm = 0.4695, lr_0 = 1.4144e-04
Loss = 3.8469e-02, PNorm = 70.5209, GNorm = 0.5854, lr_0 = 1.4134e-04
Loss = 3.5526e-02, PNorm = 70.5206, GNorm = 0.4021, lr_0 = 1.4124e-04
Loss = 3.2717e-02, PNorm = 70.5219, GNorm = 0.4910, lr_0 = 1.4115e-04
Loss = 3.3337e-02, PNorm = 70.5231, GNorm = 0.4824, lr_0 = 1.4105e-04
Loss = 4.1235e-02, PNorm = 70.5231, GNorm = 0.6860, lr_0 = 1.4095e-04
Loss = 3.9073e-02, PNorm = 70.5251, GNorm = 0.6016, lr_0 = 1.4086e-04
Loss = 3.4444e-02, PNorm = 70.5280, GNorm = 0.7482, lr_0 = 1.4076e-04
Loss = 3.4885e-02, PNorm = 70.5310, GNorm = 0.4243, lr_0 = 1.4066e-04
Loss = 4.0717e-02, PNorm = 70.5330, GNorm = 0.6464, lr_0 = 1.4057e-04
Loss = 4.1807e-02, PNorm = 70.5356, GNorm = 0.5777, lr_0 = 1.4047e-04
Loss = 3.5432e-02, PNorm = 70.5367, GNorm = 0.6879, lr_0 = 1.4038e-04
Loss = 3.8666e-02, PNorm = 70.5384, GNorm = 0.3997, lr_0 = 1.4028e-04
Loss = 4.3949e-02, PNorm = 70.5402, GNorm = 0.3310, lr_0 = 1.4018e-04
Loss = 3.4856e-02, PNorm = 70.5420, GNorm = 0.4402, lr_0 = 1.4009e-04
Loss = 3.4102e-02, PNorm = 70.5444, GNorm = 0.4705, lr_0 = 1.3999e-04
Loss = 3.3683e-02, PNorm = 70.5459, GNorm = 0.5507, lr_0 = 1.3990e-04
Loss = 3.7471e-02, PNorm = 70.5481, GNorm = 0.4387, lr_0 = 1.3980e-04
Loss = 3.3010e-02, PNorm = 70.5503, GNorm = 0.4557, lr_0 = 1.3970e-04
Loss = 3.0652e-02, PNorm = 70.5526, GNorm = 0.6623, lr_0 = 1.3961e-04
Loss = 3.7076e-02, PNorm = 70.5549, GNorm = 0.4375, lr_0 = 1.3951e-04
Loss = 3.6560e-02, PNorm = 70.5573, GNorm = 0.4272, lr_0 = 1.3942e-04
Loss = 3.9279e-02, PNorm = 70.5599, GNorm = 0.5275, lr_0 = 1.3932e-04
Loss = 3.8816e-02, PNorm = 70.5611, GNorm = 0.5046, lr_0 = 1.3923e-04
Loss = 2.9208e-02, PNorm = 70.5619, GNorm = 0.4477, lr_0 = 1.3913e-04
Loss = 4.1805e-02, PNorm = 70.5644, GNorm = 0.4838, lr_0 = 1.3904e-04
Loss = 3.2286e-02, PNorm = 70.5669, GNorm = 0.5434, lr_0 = 1.3894e-04
Validation mae = 0.388587
Epoch 26
Loss = 3.6776e-02, PNorm = 70.5695, GNorm = 0.5718, lr_0 = 1.3884e-04
Loss = 3.2008e-02, PNorm = 70.5724, GNorm = 0.4782, lr_0 = 1.3875e-04
Loss = 2.7179e-02, PNorm = 70.5745, GNorm = 0.3689, lr_0 = 1.3865e-04
Loss = 3.3319e-02, PNorm = 70.5772, GNorm = 0.5224, lr_0 = 1.3856e-04
Loss = 3.2385e-02, PNorm = 70.5796, GNorm = 0.3853, lr_0 = 1.3846e-04
Loss = 3.1619e-02, PNorm = 70.5816, GNorm = 0.4584, lr_0 = 1.3837e-04
Loss = 2.9307e-02, PNorm = 70.5826, GNorm = 0.7070, lr_0 = 1.3828e-04
Loss = 3.4137e-02, PNorm = 70.5853, GNorm = 0.7122, lr_0 = 1.3818e-04
Loss = 3.3112e-02, PNorm = 70.5875, GNorm = 0.6668, lr_0 = 1.3809e-04
Loss = 3.5602e-02, PNorm = 70.5896, GNorm = 0.4289, lr_0 = 1.3799e-04
Loss = 3.4471e-02, PNorm = 70.5919, GNorm = 0.4915, lr_0 = 1.3790e-04
Loss = 3.6399e-02, PNorm = 70.5955, GNorm = 0.4041, lr_0 = 1.3780e-04
Loss = 2.6177e-02, PNorm = 70.5979, GNorm = 0.3725, lr_0 = 1.3771e-04
Loss = 3.3516e-02, PNorm = 70.5998, GNorm = 0.4304, lr_0 = 1.3761e-04
Loss = 3.6391e-02, PNorm = 70.6003, GNorm = 0.3830, lr_0 = 1.3752e-04
Loss = 3.5120e-02, PNorm = 70.6009, GNorm = 0.4608, lr_0 = 1.3742e-04
Loss = 3.6270e-02, PNorm = 70.6029, GNorm = 0.5136, lr_0 = 1.3733e-04
Loss = 3.6166e-02, PNorm = 70.6057, GNorm = 0.6762, lr_0 = 1.3724e-04
Loss = 3.6081e-02, PNorm = 70.6074, GNorm = 0.5235, lr_0 = 1.3714e-04
Loss = 4.1481e-02, PNorm = 70.6101, GNorm = 0.4939, lr_0 = 1.3705e-04
Loss = 2.9882e-02, PNorm = 70.6133, GNorm = 0.4033, lr_0 = 1.3695e-04
Loss = 3.4794e-02, PNorm = 70.6160, GNorm = 0.6093, lr_0 = 1.3686e-04
Loss = 3.6365e-02, PNorm = 70.6182, GNorm = 0.4142, lr_0 = 1.3677e-04
Loss = 3.1494e-02, PNorm = 70.6195, GNorm = 0.5503, lr_0 = 1.3667e-04
Loss = 3.3814e-02, PNorm = 70.6212, GNorm = 0.5981, lr_0 = 1.3658e-04
Loss = 3.0721e-02, PNorm = 70.6227, GNorm = 0.4422, lr_0 = 1.3649e-04
Loss = 3.5198e-02, PNorm = 70.6247, GNorm = 0.4100, lr_0 = 1.3639e-04
Loss = 3.1174e-02, PNorm = 70.6270, GNorm = 0.3897, lr_0 = 1.3630e-04
Loss = 3.0793e-02, PNorm = 70.6294, GNorm = 0.6071, lr_0 = 1.3621e-04
Loss = 3.4379e-02, PNorm = 70.6302, GNorm = 0.4104, lr_0 = 1.3611e-04
Loss = 2.8379e-02, PNorm = 70.6329, GNorm = 0.3490, lr_0 = 1.3602e-04
Loss = 3.4122e-02, PNorm = 70.6353, GNorm = 0.5650, lr_0 = 1.3593e-04
Loss = 3.4304e-02, PNorm = 70.6368, GNorm = 0.4301, lr_0 = 1.3583e-04
Loss = 3.8031e-02, PNorm = 70.6395, GNorm = 0.7243, lr_0 = 1.3574e-04
Loss = 3.1943e-02, PNorm = 70.6422, GNorm = 0.4424, lr_0 = 1.3565e-04
Loss = 3.2816e-02, PNorm = 70.6434, GNorm = 0.4589, lr_0 = 1.3555e-04
Loss = 3.5812e-02, PNorm = 70.6451, GNorm = 0.4526, lr_0 = 1.3546e-04
Loss = 3.2660e-02, PNorm = 70.6473, GNorm = 0.4683, lr_0 = 1.3537e-04
Loss = 3.2175e-02, PNorm = 70.6492, GNorm = 0.4737, lr_0 = 1.3528e-04
Loss = 3.7443e-02, PNorm = 70.6508, GNorm = 0.5589, lr_0 = 1.3518e-04
Loss = 3.8062e-02, PNorm = 70.6526, GNorm = 0.7013, lr_0 = 1.3509e-04
Loss = 3.4809e-02, PNorm = 70.6545, GNorm = 0.5252, lr_0 = 1.3500e-04
Loss = 3.3831e-02, PNorm = 70.6573, GNorm = 0.6023, lr_0 = 1.3491e-04
Loss = 4.1022e-02, PNorm = 70.6607, GNorm = 0.3732, lr_0 = 1.3481e-04
Loss = 3.1964e-02, PNorm = 70.6622, GNorm = 0.4507, lr_0 = 1.3472e-04
Loss = 4.1336e-02, PNorm = 70.6642, GNorm = 0.3921, lr_0 = 1.3463e-04
Loss = 2.9683e-02, PNorm = 70.6660, GNorm = 0.3696, lr_0 = 1.3454e-04
Loss = 3.2231e-02, PNorm = 70.6675, GNorm = 0.5047, lr_0 = 1.3444e-04
Loss = 4.1571e-02, PNorm = 70.6695, GNorm = 0.4359, lr_0 = 1.3435e-04
Loss = 3.2302e-02, PNorm = 70.6717, GNorm = 0.5226, lr_0 = 1.3426e-04
Loss = 2.8463e-02, PNorm = 70.6747, GNorm = 0.4513, lr_0 = 1.3417e-04
Loss = 3.1236e-02, PNorm = 70.6781, GNorm = 0.3677, lr_0 = 1.3408e-04
Loss = 3.3061e-02, PNorm = 70.6793, GNorm = 0.3811, lr_0 = 1.3398e-04
Loss = 2.9852e-02, PNorm = 70.6803, GNorm = 0.4853, lr_0 = 1.3389e-04
Loss = 3.8789e-02, PNorm = 70.6831, GNorm = 0.9877, lr_0 = 1.3380e-04
Loss = 3.7004e-02, PNorm = 70.6854, GNorm = 0.4317, lr_0 = 1.3371e-04
Loss = 3.5554e-02, PNorm = 70.6880, GNorm = 0.7501, lr_0 = 1.3362e-04
Loss = 3.7926e-02, PNorm = 70.6897, GNorm = 0.6044, lr_0 = 1.3353e-04
Loss = 3.9214e-02, PNorm = 70.6925, GNorm = 0.4290, lr_0 = 1.3343e-04
Loss = 3.2291e-02, PNorm = 70.6941, GNorm = 0.4611, lr_0 = 1.3334e-04
Loss = 3.7649e-02, PNorm = 70.6967, GNorm = 0.4300, lr_0 = 1.3325e-04
Loss = 3.2874e-02, PNorm = 70.6977, GNorm = 0.3619, lr_0 = 1.3316e-04
Loss = 3.1504e-02, PNorm = 70.6985, GNorm = 0.4068, lr_0 = 1.3307e-04
Loss = 3.1332e-02, PNorm = 70.6990, GNorm = 0.4912, lr_0 = 1.3298e-04
Loss = 3.5174e-02, PNorm = 70.7004, GNorm = 0.6983, lr_0 = 1.3289e-04
Loss = 3.9432e-02, PNorm = 70.7029, GNorm = 0.4449, lr_0 = 1.3280e-04
Loss = 3.7157e-02, PNorm = 70.7041, GNorm = 0.5018, lr_0 = 1.3270e-04
Loss = 3.9453e-02, PNorm = 70.7056, GNorm = 0.5770, lr_0 = 1.3261e-04
Loss = 4.0220e-02, PNorm = 70.7067, GNorm = 0.4064, lr_0 = 1.3252e-04
Loss = 4.0318e-02, PNorm = 70.7087, GNorm = 0.5622, lr_0 = 1.3243e-04
Loss = 3.5032e-02, PNorm = 70.7105, GNorm = 0.5490, lr_0 = 1.3234e-04
Loss = 3.0429e-02, PNorm = 70.7116, GNorm = 0.3863, lr_0 = 1.3225e-04
Loss = 3.5326e-02, PNorm = 70.7130, GNorm = 0.5221, lr_0 = 1.3216e-04
Loss = 4.3073e-02, PNorm = 70.7140, GNorm = 1.0257, lr_0 = 1.3207e-04
Loss = 3.8731e-02, PNorm = 70.7167, GNorm = 0.6834, lr_0 = 1.3198e-04
Loss = 3.3694e-02, PNorm = 70.7200, GNorm = 0.3739, lr_0 = 1.3189e-04
Loss = 3.2786e-02, PNorm = 70.7224, GNorm = 0.5801, lr_0 = 1.3180e-04
Loss = 3.2067e-02, PNorm = 70.7244, GNorm = 0.5416, lr_0 = 1.3171e-04
Loss = 3.4453e-02, PNorm = 70.7272, GNorm = 0.4690, lr_0 = 1.3162e-04
Loss = 3.0673e-02, PNorm = 70.7299, GNorm = 0.7027, lr_0 = 1.3153e-04
Loss = 3.4374e-02, PNorm = 70.7315, GNorm = 0.3928, lr_0 = 1.3144e-04
Loss = 3.5635e-02, PNorm = 70.7322, GNorm = 0.4678, lr_0 = 1.3135e-04
Loss = 3.3882e-02, PNorm = 70.7337, GNorm = 0.3744, lr_0 = 1.3126e-04
Loss = 3.5574e-02, PNorm = 70.7339, GNorm = 0.4759, lr_0 = 1.3117e-04
Loss = 3.8082e-02, PNorm = 70.7361, GNorm = 0.5365, lr_0 = 1.3108e-04
Loss = 4.2159e-02, PNorm = 70.7383, GNorm = 0.4408, lr_0 = 1.3099e-04
Loss = 3.3482e-02, PNorm = 70.7405, GNorm = 0.4517, lr_0 = 1.3090e-04
Loss = 3.7336e-02, PNorm = 70.7416, GNorm = 0.7875, lr_0 = 1.3081e-04
Loss = 3.6885e-02, PNorm = 70.7418, GNorm = 0.6442, lr_0 = 1.3072e-04
Loss = 3.2325e-02, PNorm = 70.7435, GNorm = 0.4890, lr_0 = 1.3063e-04
Loss = 3.4212e-02, PNorm = 70.7448, GNorm = 0.4614, lr_0 = 1.3054e-04
Loss = 3.6212e-02, PNorm = 70.7459, GNorm = 0.3680, lr_0 = 1.3045e-04
Loss = 3.6785e-02, PNorm = 70.7473, GNorm = 0.5746, lr_0 = 1.3036e-04
Loss = 3.1581e-02, PNorm = 70.7490, GNorm = 0.3723, lr_0 = 1.3027e-04
Loss = 3.7670e-02, PNorm = 70.7505, GNorm = 0.6477, lr_0 = 1.3018e-04
Loss = 3.3399e-02, PNorm = 70.7528, GNorm = 0.5159, lr_0 = 1.3009e-04
Loss = 3.5466e-02, PNorm = 70.7557, GNorm = 0.5147, lr_0 = 1.3000e-04
Loss = 3.2836e-02, PNorm = 70.7578, GNorm = 0.4956, lr_0 = 1.2992e-04
Loss = 3.1123e-02, PNorm = 70.7596, GNorm = 0.4581, lr_0 = 1.2983e-04
Loss = 3.7063e-02, PNorm = 70.7611, GNorm = 0.6227, lr_0 = 1.2974e-04
Loss = 3.9775e-02, PNorm = 70.7632, GNorm = 0.5031, lr_0 = 1.2965e-04
Loss = 3.9968e-02, PNorm = 70.7653, GNorm = 0.4898, lr_0 = 1.2956e-04
Loss = 3.3682e-02, PNorm = 70.7671, GNorm = 0.5185, lr_0 = 1.2947e-04
Loss = 3.0660e-02, PNorm = 70.7700, GNorm = 0.4137, lr_0 = 1.2938e-04
Loss = 3.4689e-02, PNorm = 70.7712, GNorm = 0.4876, lr_0 = 1.2929e-04
Loss = 3.5653e-02, PNorm = 70.7723, GNorm = 0.6003, lr_0 = 1.2921e-04
Loss = 3.3323e-02, PNorm = 70.7727, GNorm = 0.2979, lr_0 = 1.2912e-04
Loss = 3.7358e-02, PNorm = 70.7741, GNorm = 0.5215, lr_0 = 1.2903e-04
Loss = 3.3705e-02, PNorm = 70.7763, GNorm = 0.3965, lr_0 = 1.2894e-04
Loss = 4.0305e-02, PNorm = 70.7783, GNorm = 0.4066, lr_0 = 1.2885e-04
Loss = 3.1987e-02, PNorm = 70.7797, GNorm = 0.4538, lr_0 = 1.2876e-04
Loss = 4.1555e-02, PNorm = 70.7820, GNorm = 0.4272, lr_0 = 1.2867e-04
Loss = 3.0351e-02, PNorm = 70.7832, GNorm = 0.4807, lr_0 = 1.2859e-04
Loss = 3.4818e-02, PNorm = 70.7844, GNorm = 0.4962, lr_0 = 1.2850e-04
Loss = 3.5095e-02, PNorm = 70.7857, GNorm = 0.3578, lr_0 = 1.2841e-04
Loss = 2.9186e-02, PNorm = 70.7864, GNorm = 0.5689, lr_0 = 1.2832e-04
Loss = 4.5264e-02, PNorm = 70.7881, GNorm = 0.6326, lr_0 = 1.2823e-04
Loss = 3.7177e-02, PNorm = 70.7893, GNorm = 0.5041, lr_0 = 1.2815e-04
Loss = 3.3991e-02, PNorm = 70.7900, GNorm = 0.4006, lr_0 = 1.2806e-04
Loss = 3.2754e-02, PNorm = 70.7910, GNorm = 0.3427, lr_0 = 1.2797e-04
Validation mae = 0.387986
Epoch 27
Loss = 2.9002e-02, PNorm = 70.7931, GNorm = 0.6155, lr_0 = 1.2788e-04
Loss = 3.0455e-02, PNorm = 70.7959, GNorm = 0.3271, lr_0 = 1.2780e-04
Loss = 3.4438e-02, PNorm = 70.7982, GNorm = 0.5622, lr_0 = 1.2771e-04
Loss = 2.7177e-02, PNorm = 70.8009, GNorm = 0.4632, lr_0 = 1.2762e-04
Loss = 3.1241e-02, PNorm = 70.8050, GNorm = 0.3396, lr_0 = 1.2753e-04
Loss = 3.1674e-02, PNorm = 70.8072, GNorm = 0.6292, lr_0 = 1.2745e-04
Loss = 2.8513e-02, PNorm = 70.8088, GNorm = 0.3966, lr_0 = 1.2736e-04
Loss = 3.2599e-02, PNorm = 70.8099, GNorm = 0.6608, lr_0 = 1.2727e-04
Loss = 3.2182e-02, PNorm = 70.8120, GNorm = 0.5592, lr_0 = 1.2718e-04
Loss = 3.2041e-02, PNorm = 70.8142, GNorm = 0.4408, lr_0 = 1.2710e-04
Loss = 2.9134e-02, PNorm = 70.8160, GNorm = 0.4805, lr_0 = 1.2701e-04
Loss = 2.9457e-02, PNorm = 70.8180, GNorm = 0.3757, lr_0 = 1.2692e-04
Loss = 3.3300e-02, PNorm = 70.8202, GNorm = 0.3703, lr_0 = 1.2684e-04
Loss = 3.2067e-02, PNorm = 70.8215, GNorm = 0.6109, lr_0 = 1.2675e-04
Loss = 3.3259e-02, PNorm = 70.8231, GNorm = 0.7141, lr_0 = 1.2666e-04
Loss = 3.5175e-02, PNorm = 70.8240, GNorm = 0.6442, lr_0 = 1.2658e-04
Loss = 3.8016e-02, PNorm = 70.8258, GNorm = 0.4693, lr_0 = 1.2649e-04
Loss = 3.1686e-02, PNorm = 70.8278, GNorm = 0.5103, lr_0 = 1.2640e-04
Loss = 3.1826e-02, PNorm = 70.8294, GNorm = 0.5612, lr_0 = 1.2632e-04
Loss = 3.5033e-02, PNorm = 70.8308, GNorm = 0.5010, lr_0 = 1.2623e-04
Loss = 3.3142e-02, PNorm = 70.8337, GNorm = 0.4137, lr_0 = 1.2614e-04
Loss = 3.3337e-02, PNorm = 70.8358, GNorm = 0.4387, lr_0 = 1.2606e-04
Loss = 3.3336e-02, PNorm = 70.8372, GNorm = 0.3682, lr_0 = 1.2597e-04
Loss = 2.9940e-02, PNorm = 70.8392, GNorm = 0.3403, lr_0 = 1.2588e-04
Loss = 3.4964e-02, PNorm = 70.8414, GNorm = 0.4436, lr_0 = 1.2580e-04
Loss = 3.4623e-02, PNorm = 70.8426, GNorm = 0.4824, lr_0 = 1.2571e-04
Loss = 3.0651e-02, PNorm = 70.8435, GNorm = 0.4498, lr_0 = 1.2563e-04
Loss = 2.5844e-02, PNorm = 70.8447, GNorm = 0.3139, lr_0 = 1.2554e-04
Loss = 2.9846e-02, PNorm = 70.8459, GNorm = 0.4061, lr_0 = 1.2545e-04
Loss = 2.8037e-02, PNorm = 70.8472, GNorm = 0.4530, lr_0 = 1.2537e-04
Loss = 2.7304e-02, PNorm = 70.8488, GNorm = 0.3407, lr_0 = 1.2528e-04
Loss = 2.8643e-02, PNorm = 70.8510, GNorm = 0.5299, lr_0 = 1.2520e-04
Loss = 3.3458e-02, PNorm = 70.8533, GNorm = 0.5247, lr_0 = 1.2511e-04
Loss = 3.5590e-02, PNorm = 70.8543, GNorm = 0.4733, lr_0 = 1.2502e-04
Loss = 3.4241e-02, PNorm = 70.8568, GNorm = 0.6061, lr_0 = 1.2494e-04
Loss = 3.7991e-02, PNorm = 70.8584, GNorm = 0.8574, lr_0 = 1.2485e-04
Loss = 3.3952e-02, PNorm = 70.8607, GNorm = 0.5525, lr_0 = 1.2477e-04
Loss = 3.1359e-02, PNorm = 70.8623, GNorm = 0.5224, lr_0 = 1.2468e-04
Loss = 3.2685e-02, PNorm = 70.8634, GNorm = 0.4476, lr_0 = 1.2460e-04
Loss = 2.9992e-02, PNorm = 70.8634, GNorm = 0.5139, lr_0 = 1.2451e-04
Loss = 3.5620e-02, PNorm = 70.8638, GNorm = 0.4102, lr_0 = 1.2443e-04
Loss = 3.3954e-02, PNorm = 70.8650, GNorm = 0.5173, lr_0 = 1.2434e-04
Loss = 2.7915e-02, PNorm = 70.8659, GNorm = 0.3949, lr_0 = 1.2426e-04
Loss = 3.1191e-02, PNorm = 70.8675, GNorm = 0.6987, lr_0 = 1.2417e-04
Loss = 3.1239e-02, PNorm = 70.8703, GNorm = 0.4469, lr_0 = 1.2409e-04
Loss = 3.3223e-02, PNorm = 70.8720, GNorm = 0.4094, lr_0 = 1.2400e-04
Loss = 3.0208e-02, PNorm = 70.8733, GNorm = 0.4002, lr_0 = 1.2392e-04
Loss = 3.0583e-02, PNorm = 70.8752, GNorm = 0.7359, lr_0 = 1.2383e-04
Loss = 3.4897e-02, PNorm = 70.8766, GNorm = 0.5429, lr_0 = 1.2375e-04
Loss = 2.9944e-02, PNorm = 70.8772, GNorm = 0.4939, lr_0 = 1.2366e-04
Loss = 3.2592e-02, PNorm = 70.8779, GNorm = 0.3944, lr_0 = 1.2358e-04
Loss = 3.0943e-02, PNorm = 70.8807, GNorm = 0.4275, lr_0 = 1.2349e-04
Loss = 3.4288e-02, PNorm = 70.8840, GNorm = 0.5952, lr_0 = 1.2341e-04
Loss = 3.0199e-02, PNorm = 70.8870, GNorm = 0.3580, lr_0 = 1.2332e-04
Loss = 3.5670e-02, PNorm = 70.8887, GNorm = 0.5186, lr_0 = 1.2324e-04
Loss = 3.5477e-02, PNorm = 70.8898, GNorm = 0.4353, lr_0 = 1.2315e-04
Loss = 3.2199e-02, PNorm = 70.8920, GNorm = 0.3951, lr_0 = 1.2307e-04
Loss = 3.5233e-02, PNorm = 70.8930, GNorm = 0.5414, lr_0 = 1.2298e-04
Loss = 3.3527e-02, PNorm = 70.8935, GNorm = 0.6139, lr_0 = 1.2290e-04
Loss = 3.4290e-02, PNorm = 70.8950, GNorm = 0.4993, lr_0 = 1.2282e-04
Loss = 3.2764e-02, PNorm = 70.8968, GNorm = 0.3892, lr_0 = 1.2273e-04
Loss = 3.1884e-02, PNorm = 70.8989, GNorm = 0.4968, lr_0 = 1.2265e-04
Loss = 3.4588e-02, PNorm = 70.9001, GNorm = 0.4859, lr_0 = 1.2256e-04
Loss = 3.3263e-02, PNorm = 70.9026, GNorm = 0.3864, lr_0 = 1.2248e-04
Loss = 2.7693e-02, PNorm = 70.9038, GNorm = 0.4727, lr_0 = 1.2240e-04
Loss = 3.3533e-02, PNorm = 70.9057, GNorm = 0.4928, lr_0 = 1.2231e-04
Loss = 3.3602e-02, PNorm = 70.9082, GNorm = 0.3788, lr_0 = 1.2223e-04
Loss = 3.4643e-02, PNorm = 70.9099, GNorm = 0.4553, lr_0 = 1.2214e-04
Loss = 3.3951e-02, PNorm = 70.9113, GNorm = 0.3904, lr_0 = 1.2206e-04
Loss = 4.2464e-02, PNorm = 70.9122, GNorm = 0.5376, lr_0 = 1.2198e-04
Loss = 3.3735e-02, PNorm = 70.9128, GNorm = 0.6259, lr_0 = 1.2189e-04
Loss = 3.0920e-02, PNorm = 70.9138, GNorm = 0.5368, lr_0 = 1.2181e-04
Loss = 3.2906e-02, PNorm = 70.9152, GNorm = 0.5853, lr_0 = 1.2173e-04
Loss = 3.5100e-02, PNorm = 70.9175, GNorm = 0.4929, lr_0 = 1.2164e-04
Loss = 3.1237e-02, PNorm = 70.9201, GNorm = 0.4565, lr_0 = 1.2156e-04
Loss = 4.2172e-02, PNorm = 70.9219, GNorm = 0.5321, lr_0 = 1.2148e-04
Loss = 3.2225e-02, PNorm = 70.9240, GNorm = 0.5302, lr_0 = 1.2139e-04
Loss = 3.8152e-02, PNorm = 70.9258, GNorm = 0.4182, lr_0 = 1.2131e-04
Loss = 2.9453e-02, PNorm = 70.9279, GNorm = 0.4504, lr_0 = 1.2123e-04
Loss = 2.7891e-02, PNorm = 70.9300, GNorm = 0.5400, lr_0 = 1.2114e-04
Loss = 3.7196e-02, PNorm = 70.9324, GNorm = 0.5523, lr_0 = 1.2106e-04
Loss = 3.6082e-02, PNorm = 70.9363, GNorm = 0.5039, lr_0 = 1.2098e-04
Loss = 3.0637e-02, PNorm = 70.9390, GNorm = 0.6618, lr_0 = 1.2090e-04
Loss = 3.1497e-02, PNorm = 70.9404, GNorm = 0.3279, lr_0 = 1.2081e-04
Loss = 3.3050e-02, PNorm = 70.9413, GNorm = 0.4948, lr_0 = 1.2073e-04
Loss = 3.5968e-02, PNorm = 70.9420, GNorm = 0.5969, lr_0 = 1.2065e-04
Loss = 4.0108e-02, PNorm = 70.9437, GNorm = 0.4349, lr_0 = 1.2056e-04
Loss = 3.3913e-02, PNorm = 70.9447, GNorm = 0.4312, lr_0 = 1.2048e-04
Loss = 3.3287e-02, PNorm = 70.9458, GNorm = 0.4853, lr_0 = 1.2040e-04
Loss = 3.2729e-02, PNorm = 70.9466, GNorm = 0.4845, lr_0 = 1.2032e-04
Loss = 3.6553e-02, PNorm = 70.9489, GNorm = 0.6341, lr_0 = 1.2023e-04
Loss = 4.0613e-02, PNorm = 70.9513, GNorm = 0.4919, lr_0 = 1.2015e-04
Loss = 3.1510e-02, PNorm = 70.9528, GNorm = 0.4075, lr_0 = 1.2007e-04
Loss = 3.4095e-02, PNorm = 70.9545, GNorm = 0.4261, lr_0 = 1.1999e-04
Loss = 3.4383e-02, PNorm = 70.9562, GNorm = 0.4906, lr_0 = 1.1991e-04
Loss = 3.3658e-02, PNorm = 70.9577, GNorm = 0.5544, lr_0 = 1.1982e-04
Loss = 3.2967e-02, PNorm = 70.9593, GNorm = 0.4630, lr_0 = 1.1974e-04
Loss = 3.3800e-02, PNorm = 70.9613, GNorm = 0.4744, lr_0 = 1.1966e-04
Loss = 3.7322e-02, PNorm = 70.9626, GNorm = 0.4685, lr_0 = 1.1958e-04
Loss = 3.5183e-02, PNorm = 70.9641, GNorm = 0.6038, lr_0 = 1.1950e-04
Loss = 3.9569e-02, PNorm = 70.9649, GNorm = 0.4183, lr_0 = 1.1941e-04
Loss = 3.6956e-02, PNorm = 70.9666, GNorm = 0.6167, lr_0 = 1.1933e-04
Loss = 2.8950e-02, PNorm = 70.9676, GNorm = 0.3207, lr_0 = 1.1925e-04
Loss = 3.9779e-02, PNorm = 70.9692, GNorm = 0.6566, lr_0 = 1.1917e-04
Loss = 3.2333e-02, PNorm = 70.9727, GNorm = 0.6635, lr_0 = 1.1909e-04
Loss = 3.1046e-02, PNorm = 70.9749, GNorm = 0.4803, lr_0 = 1.1901e-04
Loss = 3.6281e-02, PNorm = 70.9760, GNorm = 0.4972, lr_0 = 1.1892e-04
Loss = 3.6727e-02, PNorm = 70.9775, GNorm = 0.3750, lr_0 = 1.1884e-04
Loss = 3.2658e-02, PNorm = 70.9795, GNorm = 0.4451, lr_0 = 1.1876e-04
Loss = 3.0469e-02, PNorm = 70.9815, GNorm = 0.4834, lr_0 = 1.1868e-04
Loss = 3.6405e-02, PNorm = 70.9828, GNorm = 0.4379, lr_0 = 1.1860e-04
Loss = 3.1697e-02, PNorm = 70.9842, GNorm = 0.4664, lr_0 = 1.1852e-04
Loss = 3.4666e-02, PNorm = 70.9859, GNorm = 0.5609, lr_0 = 1.1844e-04
Loss = 3.3869e-02, PNorm = 70.9870, GNorm = 0.9094, lr_0 = 1.1835e-04
Loss = 3.3794e-02, PNorm = 70.9891, GNorm = 0.6156, lr_0 = 1.1827e-04
Loss = 3.3090e-02, PNorm = 70.9908, GNorm = 0.6499, lr_0 = 1.1819e-04
Loss = 3.2292e-02, PNorm = 70.9926, GNorm = 0.4627, lr_0 = 1.1811e-04
Loss = 3.5015e-02, PNorm = 70.9936, GNorm = 0.3570, lr_0 = 1.1803e-04
Loss = 3.2374e-02, PNorm = 70.9950, GNorm = 0.3950, lr_0 = 1.1795e-04
Loss = 3.7065e-02, PNorm = 70.9966, GNorm = 0.5162, lr_0 = 1.1787e-04
Validation mae = 0.391567
Epoch 28
Loss = 2.6914e-02, PNorm = 70.9971, GNorm = 0.3354, lr_0 = 1.1779e-04
Loss = 3.4696e-02, PNorm = 70.9982, GNorm = 0.4368, lr_0 = 1.1771e-04
Loss = 2.7313e-02, PNorm = 70.9999, GNorm = 0.4416, lr_0 = 1.1763e-04
Loss = 3.2486e-02, PNorm = 71.0018, GNorm = 0.6633, lr_0 = 1.1755e-04
Loss = 3.1639e-02, PNorm = 71.0040, GNorm = 0.4918, lr_0 = 1.1747e-04
Loss = 2.8578e-02, PNorm = 71.0061, GNorm = 0.4808, lr_0 = 1.1739e-04
Loss = 3.1314e-02, PNorm = 71.0077, GNorm = 0.6641, lr_0 = 1.1730e-04
Loss = 2.6156e-02, PNorm = 71.0089, GNorm = 0.3668, lr_0 = 1.1722e-04
Loss = 3.4833e-02, PNorm = 71.0103, GNorm = 0.6409, lr_0 = 1.1714e-04
Loss = 3.0839e-02, PNorm = 71.0117, GNorm = 0.4694, lr_0 = 1.1706e-04
Loss = 2.8550e-02, PNorm = 71.0131, GNorm = 0.5249, lr_0 = 1.1698e-04
Loss = 2.3620e-02, PNorm = 71.0142, GNorm = 0.3350, lr_0 = 1.1690e-04
Loss = 3.0329e-02, PNorm = 71.0155, GNorm = 0.5897, lr_0 = 1.1682e-04
Loss = 3.3851e-02, PNorm = 71.0173, GNorm = 0.5708, lr_0 = 1.1674e-04
Loss = 3.0572e-02, PNorm = 71.0190, GNorm = 0.4974, lr_0 = 1.1666e-04
Loss = 3.0721e-02, PNorm = 71.0212, GNorm = 0.4162, lr_0 = 1.1658e-04
Loss = 2.8347e-02, PNorm = 71.0233, GNorm = 0.4461, lr_0 = 1.1650e-04
Loss = 2.7787e-02, PNorm = 71.0245, GNorm = 0.4200, lr_0 = 1.1642e-04
Loss = 2.7886e-02, PNorm = 71.0256, GNorm = 0.4954, lr_0 = 1.1634e-04
Loss = 2.9226e-02, PNorm = 71.0263, GNorm = 0.3624, lr_0 = 1.1626e-04
Loss = 2.8603e-02, PNorm = 71.0274, GNorm = 0.5153, lr_0 = 1.1618e-04
Loss = 3.0387e-02, PNorm = 71.0278, GNorm = 0.4737, lr_0 = 1.1611e-04
Loss = 2.7299e-02, PNorm = 71.0291, GNorm = 0.6015, lr_0 = 1.1603e-04
Loss = 2.9488e-02, PNorm = 71.0309, GNorm = 0.4731, lr_0 = 1.1595e-04
Loss = 2.9625e-02, PNorm = 71.0328, GNorm = 0.4775, lr_0 = 1.1587e-04
Loss = 2.5066e-02, PNorm = 71.0344, GNorm = 0.3602, lr_0 = 1.1579e-04
Loss = 3.4762e-02, PNorm = 71.0356, GNorm = 0.4559, lr_0 = 1.1571e-04
Loss = 3.5457e-02, PNorm = 71.0381, GNorm = 0.5273, lr_0 = 1.1563e-04
Loss = 3.6415e-02, PNorm = 71.0410, GNorm = 0.5355, lr_0 = 1.1555e-04
Loss = 3.5657e-02, PNorm = 71.0431, GNorm = 0.4659, lr_0 = 1.1547e-04
Loss = 3.3070e-02, PNorm = 71.0447, GNorm = 0.4614, lr_0 = 1.1539e-04
Loss = 2.8821e-02, PNorm = 71.0451, GNorm = 0.4272, lr_0 = 1.1531e-04
Loss = 3.3585e-02, PNorm = 71.0464, GNorm = 0.6428, lr_0 = 1.1523e-04
Loss = 3.2451e-02, PNorm = 71.0489, GNorm = 0.4730, lr_0 = 1.1515e-04
Loss = 2.6873e-02, PNorm = 71.0509, GNorm = 0.4762, lr_0 = 1.1508e-04
Loss = 3.1590e-02, PNorm = 71.0522, GNorm = 0.4551, lr_0 = 1.1500e-04
Loss = 3.1664e-02, PNorm = 71.0529, GNorm = 0.4057, lr_0 = 1.1492e-04
Loss = 3.5026e-02, PNorm = 71.0543, GNorm = 0.4071, lr_0 = 1.1484e-04
Loss = 2.8496e-02, PNorm = 71.0564, GNorm = 0.3944, lr_0 = 1.1476e-04
Loss = 2.8661e-02, PNorm = 71.0587, GNorm = 0.4071, lr_0 = 1.1468e-04
Loss = 3.6548e-02, PNorm = 71.0595, GNorm = 0.5231, lr_0 = 1.1460e-04
Loss = 2.7813e-02, PNorm = 71.0608, GNorm = 0.4582, lr_0 = 1.1452e-04
Loss = 2.9157e-02, PNorm = 71.0625, GNorm = 0.4217, lr_0 = 1.1445e-04
Loss = 2.7992e-02, PNorm = 71.0641, GNorm = 0.3644, lr_0 = 1.1437e-04
Loss = 3.2154e-02, PNorm = 71.0662, GNorm = 0.5443, lr_0 = 1.1429e-04
Loss = 3.2338e-02, PNorm = 71.0678, GNorm = 0.4845, lr_0 = 1.1421e-04
Loss = 2.9699e-02, PNorm = 71.0687, GNorm = 0.5210, lr_0 = 1.1413e-04
Loss = 3.2416e-02, PNorm = 71.0693, GNorm = 0.5821, lr_0 = 1.1405e-04
Loss = 3.3896e-02, PNorm = 71.0707, GNorm = 0.4829, lr_0 = 1.1398e-04
Loss = 2.9646e-02, PNorm = 71.0726, GNorm = 0.4535, lr_0 = 1.1390e-04
Loss = 4.0373e-02, PNorm = 71.0743, GNorm = 0.7689, lr_0 = 1.1382e-04
Loss = 3.2507e-02, PNorm = 71.0754, GNorm = 0.5318, lr_0 = 1.1374e-04
Loss = 2.7632e-02, PNorm = 71.0765, GNorm = 0.3489, lr_0 = 1.1366e-04
Loss = 3.0892e-02, PNorm = 71.0772, GNorm = 0.3737, lr_0 = 1.1359e-04
Loss = 3.1461e-02, PNorm = 71.0785, GNorm = 0.5841, lr_0 = 1.1351e-04
Loss = 2.9851e-02, PNorm = 71.0798, GNorm = 0.4643, lr_0 = 1.1343e-04
Loss = 3.3177e-02, PNorm = 71.0817, GNorm = 0.5193, lr_0 = 1.1335e-04
Loss = 3.0247e-02, PNorm = 71.0842, GNorm = 0.4381, lr_0 = 1.1328e-04
Loss = 3.7085e-02, PNorm = 71.0861, GNorm = 0.7838, lr_0 = 1.1320e-04
Loss = 2.8884e-02, PNorm = 71.0881, GNorm = 0.4111, lr_0 = 1.1312e-04
Loss = 2.8303e-02, PNorm = 71.0892, GNorm = 0.3705, lr_0 = 1.1304e-04
Loss = 2.9629e-02, PNorm = 71.0906, GNorm = 0.4735, lr_0 = 1.1297e-04
Loss = 3.0537e-02, PNorm = 71.0927, GNorm = 0.4330, lr_0 = 1.1289e-04
Loss = 3.7351e-02, PNorm = 71.0937, GNorm = 0.5238, lr_0 = 1.1281e-04
Loss = 3.4126e-02, PNorm = 71.0941, GNorm = 0.5811, lr_0 = 1.1273e-04
Loss = 3.0120e-02, PNorm = 71.0953, GNorm = 0.4437, lr_0 = 1.1266e-04
Loss = 2.8883e-02, PNorm = 71.0966, GNorm = 0.3736, lr_0 = 1.1258e-04
Loss = 3.8628e-02, PNorm = 71.0981, GNorm = 0.3772, lr_0 = 1.1250e-04
Loss = 2.8853e-02, PNorm = 71.0995, GNorm = 0.3378, lr_0 = 1.1243e-04
Loss = 2.8611e-02, PNorm = 71.1011, GNorm = 0.3870, lr_0 = 1.1235e-04
Loss = 2.8319e-02, PNorm = 71.1018, GNorm = 0.4807, lr_0 = 1.1227e-04
Loss = 3.0444e-02, PNorm = 71.1038, GNorm = 0.5974, lr_0 = 1.1219e-04
Loss = 3.1559e-02, PNorm = 71.1054, GNorm = 0.4371, lr_0 = 1.1212e-04
Loss = 3.1039e-02, PNorm = 71.1065, GNorm = 0.4909, lr_0 = 1.1204e-04
Loss = 3.4792e-02, PNorm = 71.1084, GNorm = 0.5703, lr_0 = 1.1196e-04
Loss = 3.0097e-02, PNorm = 71.1105, GNorm = 0.5408, lr_0 = 1.1189e-04
Loss = 3.4892e-02, PNorm = 71.1114, GNorm = 0.5543, lr_0 = 1.1181e-04
Loss = 3.5414e-02, PNorm = 71.1130, GNorm = 0.8267, lr_0 = 1.1173e-04
Loss = 3.1096e-02, PNorm = 71.1147, GNorm = 0.5350, lr_0 = 1.1166e-04
Loss = 3.1732e-02, PNorm = 71.1160, GNorm = 0.4882, lr_0 = 1.1158e-04
Loss = 3.2693e-02, PNorm = 71.1172, GNorm = 0.4597, lr_0 = 1.1150e-04
Loss = 3.1332e-02, PNorm = 71.1178, GNorm = 0.4345, lr_0 = 1.1143e-04
Loss = 3.4597e-02, PNorm = 71.1186, GNorm = 0.5827, lr_0 = 1.1135e-04
Loss = 3.8582e-02, PNorm = 71.1212, GNorm = 0.5389, lr_0 = 1.1128e-04
Loss = 3.8483e-02, PNorm = 71.1231, GNorm = 0.4254, lr_0 = 1.1120e-04
Loss = 3.0891e-02, PNorm = 71.1248, GNorm = 0.3637, lr_0 = 1.1112e-04
Loss = 3.7271e-02, PNorm = 71.1259, GNorm = 0.7402, lr_0 = 1.1105e-04
Loss = 3.0746e-02, PNorm = 71.1270, GNorm = 0.4185, lr_0 = 1.1097e-04
Loss = 3.0243e-02, PNorm = 71.1285, GNorm = 0.5417, lr_0 = 1.1089e-04
Loss = 3.4061e-02, PNorm = 71.1302, GNorm = 0.4693, lr_0 = 1.1082e-04
Loss = 3.0733e-02, PNorm = 71.1316, GNorm = 0.4117, lr_0 = 1.1074e-04
Loss = 3.2810e-02, PNorm = 71.1331, GNorm = 0.4358, lr_0 = 1.1067e-04
Loss = 3.2524e-02, PNorm = 71.1343, GNorm = 0.5609, lr_0 = 1.1059e-04
Loss = 2.9221e-02, PNorm = 71.1361, GNorm = 0.3491, lr_0 = 1.1052e-04
Loss = 2.9608e-02, PNorm = 71.1377, GNorm = 0.4316, lr_0 = 1.1044e-04
Loss = 2.9644e-02, PNorm = 71.1391, GNorm = 0.4204, lr_0 = 1.1036e-04
Loss = 2.9825e-02, PNorm = 71.1406, GNorm = 0.4863, lr_0 = 1.1029e-04
Loss = 3.5871e-02, PNorm = 71.1421, GNorm = 0.4639, lr_0 = 1.1021e-04
Loss = 3.0802e-02, PNorm = 71.1446, GNorm = 0.4965, lr_0 = 1.1014e-04
Loss = 3.7085e-02, PNorm = 71.1456, GNorm = 0.5048, lr_0 = 1.1006e-04
Loss = 3.2367e-02, PNorm = 71.1463, GNorm = 0.6496, lr_0 = 1.0999e-04
Loss = 3.4556e-02, PNorm = 71.1470, GNorm = 0.4545, lr_0 = 1.0991e-04
Loss = 3.4447e-02, PNorm = 71.1475, GNorm = 0.3968, lr_0 = 1.0984e-04
Loss = 3.5135e-02, PNorm = 71.1478, GNorm = 0.5041, lr_0 = 1.0976e-04
Loss = 2.6474e-02, PNorm = 71.1498, GNorm = 0.3407, lr_0 = 1.0969e-04
Loss = 3.1469e-02, PNorm = 71.1512, GNorm = 0.4004, lr_0 = 1.0961e-04
Loss = 3.3548e-02, PNorm = 71.1537, GNorm = 0.6297, lr_0 = 1.0954e-04
Loss = 3.1720e-02, PNorm = 71.1556, GNorm = 0.4703, lr_0 = 1.0946e-04
Loss = 3.1109e-02, PNorm = 71.1568, GNorm = 0.6058, lr_0 = 1.0939e-04
Loss = 3.4936e-02, PNorm = 71.1575, GNorm = 0.4755, lr_0 = 1.0931e-04
Loss = 3.4528e-02, PNorm = 71.1584, GNorm = 0.6034, lr_0 = 1.0924e-04
Loss = 3.8129e-02, PNorm = 71.1600, GNorm = 0.4580, lr_0 = 1.0916e-04
Loss = 4.3701e-02, PNorm = 71.1621, GNorm = 0.5565, lr_0 = 1.0909e-04
Loss = 3.4602e-02, PNorm = 71.1643, GNorm = 0.5235, lr_0 = 1.0901e-04
Loss = 3.2181e-02, PNorm = 71.1670, GNorm = 0.5042, lr_0 = 1.0894e-04
Loss = 3.3035e-02, PNorm = 71.1695, GNorm = 0.4602, lr_0 = 1.0886e-04
Loss = 3.8197e-02, PNorm = 71.1709, GNorm = 0.6914, lr_0 = 1.0879e-04
Loss = 3.0187e-02, PNorm = 71.1724, GNorm = 0.4911, lr_0 = 1.0871e-04
Loss = 3.1185e-02, PNorm = 71.1741, GNorm = 0.4272, lr_0 = 1.0864e-04
Loss = 3.1004e-02, PNorm = 71.1751, GNorm = 0.4720, lr_0 = 1.0856e-04
Validation mae = 0.391834
Epoch 29
Loss = 2.6161e-02, PNorm = 71.1759, GNorm = 0.4387, lr_0 = 1.0849e-04
Loss = 2.6550e-02, PNorm = 71.1772, GNorm = 0.4504, lr_0 = 1.0841e-04
Loss = 2.5217e-02, PNorm = 71.1782, GNorm = 0.4236, lr_0 = 1.0834e-04
Loss = 2.4017e-02, PNorm = 71.1795, GNorm = 0.3606, lr_0 = 1.0827e-04
Loss = 2.6916e-02, PNorm = 71.1810, GNorm = 0.4970, lr_0 = 1.0819e-04
Loss = 2.9225e-02, PNorm = 71.1828, GNorm = 0.5668, lr_0 = 1.0812e-04
Loss = 2.9935e-02, PNorm = 71.1846, GNorm = 0.5012, lr_0 = 1.0804e-04
Loss = 2.6471e-02, PNorm = 71.1861, GNorm = 0.3060, lr_0 = 1.0797e-04
Loss = 2.9069e-02, PNorm = 71.1876, GNorm = 0.6388, lr_0 = 1.0790e-04
Loss = 2.7782e-02, PNorm = 71.1899, GNorm = 0.4936, lr_0 = 1.0782e-04
Loss = 2.8153e-02, PNorm = 71.1915, GNorm = 0.3627, lr_0 = 1.0775e-04
Loss = 2.7327e-02, PNorm = 71.1921, GNorm = 0.3449, lr_0 = 1.0767e-04
Loss = 2.9537e-02, PNorm = 71.1939, GNorm = 0.4382, lr_0 = 1.0760e-04
Loss = 2.6118e-02, PNorm = 71.1954, GNorm = 0.4838, lr_0 = 1.0753e-04
Loss = 3.6097e-02, PNorm = 71.1962, GNorm = 0.5023, lr_0 = 1.0745e-04
Loss = 2.9115e-02, PNorm = 71.1980, GNorm = 0.4491, lr_0 = 1.0738e-04
Loss = 2.8820e-02, PNorm = 71.1998, GNorm = 0.4321, lr_0 = 1.0731e-04
Loss = 3.0224e-02, PNorm = 71.2020, GNorm = 0.3335, lr_0 = 1.0723e-04
Loss = 3.2097e-02, PNorm = 71.2045, GNorm = 0.6925, lr_0 = 1.0716e-04
Loss = 2.9654e-02, PNorm = 71.2073, GNorm = 0.4064, lr_0 = 1.0709e-04
Loss = 2.7672e-02, PNorm = 71.2093, GNorm = 0.4755, lr_0 = 1.0701e-04
Loss = 2.6867e-02, PNorm = 71.2119, GNorm = 0.6729, lr_0 = 1.0694e-04
Loss = 2.6150e-02, PNorm = 71.2126, GNorm = 0.5159, lr_0 = 1.0687e-04
Loss = 2.9145e-02, PNorm = 71.2137, GNorm = 0.4837, lr_0 = 1.0679e-04
Loss = 2.9023e-02, PNorm = 71.2152, GNorm = 0.4488, lr_0 = 1.0672e-04
Loss = 2.9730e-02, PNorm = 71.2166, GNorm = 0.6026, lr_0 = 1.0665e-04
Loss = 2.8592e-02, PNorm = 71.2180, GNorm = 0.5316, lr_0 = 1.0657e-04
Loss = 3.2396e-02, PNorm = 71.2196, GNorm = 0.3761, lr_0 = 1.0650e-04
Loss = 3.1730e-02, PNorm = 71.2206, GNorm = 0.4317, lr_0 = 1.0643e-04
Loss = 3.8638e-02, PNorm = 71.2220, GNorm = 0.5705, lr_0 = 1.0635e-04
Loss = 2.8648e-02, PNorm = 71.2230, GNorm = 0.3664, lr_0 = 1.0628e-04
Loss = 3.1314e-02, PNorm = 71.2235, GNorm = 0.4784, lr_0 = 1.0621e-04
Loss = 3.0030e-02, PNorm = 71.2249, GNorm = 0.7027, lr_0 = 1.0614e-04
Loss = 2.9608e-02, PNorm = 71.2264, GNorm = 0.4950, lr_0 = 1.0606e-04
Loss = 2.6594e-02, PNorm = 71.2275, GNorm = 0.3976, lr_0 = 1.0599e-04
Loss = 3.5464e-02, PNorm = 71.2278, GNorm = 0.5496, lr_0 = 1.0592e-04
Loss = 2.8997e-02, PNorm = 71.2291, GNorm = 0.5182, lr_0 = 1.0585e-04
Loss = 3.1064e-02, PNorm = 71.2313, GNorm = 0.5204, lr_0 = 1.0577e-04
Loss = 2.4339e-02, PNorm = 71.2330, GNorm = 0.5122, lr_0 = 1.0570e-04
Loss = 2.8419e-02, PNorm = 71.2342, GNorm = 0.4289, lr_0 = 1.0563e-04
Loss = 2.6434e-02, PNorm = 71.2343, GNorm = 0.3405, lr_0 = 1.0556e-04
Loss = 2.5403e-02, PNorm = 71.2354, GNorm = 0.5350, lr_0 = 1.0548e-04
Loss = 2.8053e-02, PNorm = 71.2362, GNorm = 0.4731, lr_0 = 1.0541e-04
Loss = 3.0898e-02, PNorm = 71.2377, GNorm = 0.5357, lr_0 = 1.0534e-04
Loss = 3.2492e-02, PNorm = 71.2388, GNorm = 0.4885, lr_0 = 1.0527e-04
Loss = 3.1935e-02, PNorm = 71.2396, GNorm = 0.4868, lr_0 = 1.0519e-04
Loss = 3.0775e-02, PNorm = 71.2412, GNorm = 0.7145, lr_0 = 1.0512e-04
Loss = 2.9759e-02, PNorm = 71.2441, GNorm = 0.4885, lr_0 = 1.0505e-04
Loss = 2.7568e-02, PNorm = 71.2452, GNorm = 0.3631, lr_0 = 1.0498e-04
Loss = 2.9790e-02, PNorm = 71.2460, GNorm = 0.4786, lr_0 = 1.0491e-04
Loss = 3.5431e-02, PNorm = 71.2472, GNorm = 0.5015, lr_0 = 1.0483e-04
Loss = 2.7689e-02, PNorm = 71.2486, GNorm = 0.3571, lr_0 = 1.0476e-04
Loss = 3.2047e-02, PNorm = 71.2494, GNorm = 0.4824, lr_0 = 1.0469e-04
Loss = 3.0401e-02, PNorm = 71.2507, GNorm = 0.6203, lr_0 = 1.0462e-04
Loss = 2.6506e-02, PNorm = 71.2526, GNorm = 0.5557, lr_0 = 1.0455e-04
Loss = 2.6259e-02, PNorm = 71.2544, GNorm = 0.4377, lr_0 = 1.0448e-04
Loss = 2.7932e-02, PNorm = 71.2563, GNorm = 0.4755, lr_0 = 1.0440e-04
Loss = 2.6924e-02, PNorm = 71.2576, GNorm = 0.3337, lr_0 = 1.0433e-04
Loss = 2.9272e-02, PNorm = 71.2588, GNorm = 0.4585, lr_0 = 1.0426e-04
Loss = 2.8960e-02, PNorm = 71.2594, GNorm = 0.4552, lr_0 = 1.0419e-04
Loss = 2.6465e-02, PNorm = 71.2606, GNorm = 0.4840, lr_0 = 1.0412e-04
Loss = 3.6435e-02, PNorm = 71.2620, GNorm = 0.4549, lr_0 = 1.0405e-04
Loss = 2.9706e-02, PNorm = 71.2638, GNorm = 0.4028, lr_0 = 1.0398e-04
Loss = 3.2777e-02, PNorm = 71.2655, GNorm = 0.4505, lr_0 = 1.0391e-04
Loss = 3.1655e-02, PNorm = 71.2677, GNorm = 0.4166, lr_0 = 1.0383e-04
Loss = 3.0929e-02, PNorm = 71.2690, GNorm = 0.4434, lr_0 = 1.0376e-04
Loss = 3.0123e-02, PNorm = 71.2707, GNorm = 0.4094, lr_0 = 1.0369e-04
Loss = 3.4043e-02, PNorm = 71.2721, GNorm = 0.5156, lr_0 = 1.0362e-04
Loss = 2.6064e-02, PNorm = 71.2732, GNorm = 0.4835, lr_0 = 1.0355e-04
Loss = 3.1075e-02, PNorm = 71.2741, GNorm = 0.4058, lr_0 = 1.0348e-04
Loss = 3.0944e-02, PNorm = 71.2758, GNorm = 0.4361, lr_0 = 1.0341e-04
Loss = 3.3555e-02, PNorm = 71.2767, GNorm = 0.6371, lr_0 = 1.0334e-04
Loss = 3.0407e-02, PNorm = 71.2780, GNorm = 0.5319, lr_0 = 1.0327e-04
Loss = 3.2235e-02, PNorm = 71.2799, GNorm = 0.3922, lr_0 = 1.0320e-04
Loss = 3.2241e-02, PNorm = 71.2817, GNorm = 0.3737, lr_0 = 1.0312e-04
Loss = 3.1277e-02, PNorm = 71.2829, GNorm = 0.4977, lr_0 = 1.0305e-04
Loss = 3.0306e-02, PNorm = 71.2838, GNorm = 0.6745, lr_0 = 1.0298e-04
Loss = 2.8435e-02, PNorm = 71.2854, GNorm = 0.4139, lr_0 = 1.0291e-04
Loss = 2.9632e-02, PNorm = 71.2866, GNorm = 0.4239, lr_0 = 1.0284e-04
Loss = 2.7210e-02, PNorm = 71.2875, GNorm = 0.3814, lr_0 = 1.0277e-04
Loss = 3.2264e-02, PNorm = 71.2875, GNorm = 0.6781, lr_0 = 1.0270e-04
Loss = 3.2256e-02, PNorm = 71.2885, GNorm = 0.5089, lr_0 = 1.0263e-04
Loss = 3.0441e-02, PNorm = 71.2898, GNorm = 0.4477, lr_0 = 1.0256e-04
Loss = 3.4802e-02, PNorm = 71.2912, GNorm = 0.4914, lr_0 = 1.0249e-04
Loss = 3.0869e-02, PNorm = 71.2933, GNorm = 0.4418, lr_0 = 1.0242e-04
Loss = 2.8168e-02, PNorm = 71.2950, GNorm = 0.6015, lr_0 = 1.0235e-04
Loss = 3.9249e-02, PNorm = 71.2965, GNorm = 0.6154, lr_0 = 1.0228e-04
Loss = 2.7896e-02, PNorm = 71.2983, GNorm = 0.3961, lr_0 = 1.0221e-04
Loss = 3.1667e-02, PNorm = 71.2993, GNorm = 0.4727, lr_0 = 1.0214e-04
Loss = 2.9763e-02, PNorm = 71.3001, GNorm = 0.5503, lr_0 = 1.0207e-04
Loss = 2.6542e-02, PNorm = 71.3018, GNorm = 0.4609, lr_0 = 1.0200e-04
Loss = 2.9968e-02, PNorm = 71.3033, GNorm = 0.4153, lr_0 = 1.0193e-04
Loss = 3.4339e-02, PNorm = 71.3031, GNorm = 0.6991, lr_0 = 1.0186e-04
Loss = 3.0350e-02, PNorm = 71.3043, GNorm = 0.4618, lr_0 = 1.0179e-04
Loss = 3.0301e-02, PNorm = 71.3060, GNorm = 0.6861, lr_0 = 1.0172e-04
Loss = 2.8785e-02, PNorm = 71.3069, GNorm = 0.4854, lr_0 = 1.0165e-04
Loss = 3.3869e-02, PNorm = 71.3086, GNorm = 0.4148, lr_0 = 1.0158e-04
Loss = 3.3856e-02, PNorm = 71.3095, GNorm = 0.5877, lr_0 = 1.0151e-04
Loss = 3.1043e-02, PNorm = 71.3098, GNorm = 0.7206, lr_0 = 1.0144e-04
Loss = 3.4301e-02, PNorm = 71.3106, GNorm = 0.5324, lr_0 = 1.0137e-04
Loss = 2.9421e-02, PNorm = 71.3111, GNorm = 0.3688, lr_0 = 1.0130e-04
Loss = 2.6403e-02, PNorm = 71.3126, GNorm = 0.3978, lr_0 = 1.0123e-04
Loss = 3.3633e-02, PNorm = 71.3146, GNorm = 0.3833, lr_0 = 1.0116e-04
Loss = 2.9588e-02, PNorm = 71.3157, GNorm = 0.3852, lr_0 = 1.0110e-04
Loss = 3.0243e-02, PNorm = 71.3166, GNorm = 0.3644, lr_0 = 1.0103e-04
Loss = 3.1741e-02, PNorm = 71.3168, GNorm = 0.6008, lr_0 = 1.0096e-04
Loss = 3.3409e-02, PNorm = 71.3179, GNorm = 0.4423, lr_0 = 1.0089e-04
Loss = 2.7377e-02, PNorm = 71.3193, GNorm = 0.3694, lr_0 = 1.0082e-04
Loss = 3.5896e-02, PNorm = 71.3199, GNorm = 0.5188, lr_0 = 1.0075e-04
Loss = 3.2418e-02, PNorm = 71.3206, GNorm = 0.7263, lr_0 = 1.0068e-04
Loss = 3.5709e-02, PNorm = 71.3219, GNorm = 0.4074, lr_0 = 1.0061e-04
Loss = 2.9781e-02, PNorm = 71.3229, GNorm = 0.4227, lr_0 = 1.0054e-04
Loss = 2.9709e-02, PNorm = 71.3243, GNorm = 0.3777, lr_0 = 1.0047e-04
Loss = 4.0906e-02, PNorm = 71.3256, GNorm = 0.4958, lr_0 = 1.0041e-04
Loss = 3.2139e-02, PNorm = 71.3275, GNorm = 0.7988, lr_0 = 1.0034e-04
Loss = 3.1199e-02, PNorm = 71.3285, GNorm = 0.5360, lr_0 = 1.0027e-04
Loss = 2.8835e-02, PNorm = 71.3296, GNorm = 0.4136, lr_0 = 1.0020e-04
Loss = 3.3134e-02, PNorm = 71.3308, GNorm = 0.5931, lr_0 = 1.0013e-04
Loss = 3.1200e-02, PNorm = 71.3314, GNorm = 0.3207, lr_0 = 1.0006e-04
Loss = 3.1590e-02, PNorm = 71.3324, GNorm = 0.5040, lr_0 = 1.0000e-04
Validation mae = 0.396204
Model 0 best validation mae = 0.384121 on epoch 20
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Moving model to cuda
Model 0 test mae = 0.380772
Ensemble test mae = 0.380772
Fold 1
Splitting data with seed 1
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.0, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=400, bias=False)
        (W_h): Linear(in_features=400, out_features=400, bias=False)
        (W_o): Linear(in_features=533, out_features=400, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=400, out_features=400, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=400, out_features=1, bias=True)
  )
)
Number of parameters = 593,201
Moving model to cuda
Epoch 0
Loss = 1.0310e+00, PNorm = 38.3704, GNorm = 3.6744, lr_0 = 1.0413e-04
Loss = 8.8324e-01, PNorm = 38.3729, GNorm = 2.3329, lr_0 = 1.0788e-04
Loss = 7.7182e-01, PNorm = 38.3766, GNorm = 1.0406, lr_0 = 1.1163e-04
Loss = 9.2425e-01, PNorm = 38.3789, GNorm = 4.4856, lr_0 = 1.1537e-04
Loss = 7.5949e-01, PNorm = 38.3812, GNorm = 4.1867, lr_0 = 1.1913e-04
Loss = 7.4514e-01, PNorm = 38.3861, GNorm = 1.4836, lr_0 = 1.2287e-04
Loss = 6.1691e-01, PNorm = 38.3916, GNorm = 7.8525, lr_0 = 1.2663e-04
Loss = 6.4459e-01, PNorm = 38.3967, GNorm = 7.3141, lr_0 = 1.3038e-04
Loss = 6.0884e-01, PNorm = 38.4024, GNorm = 3.7352, lr_0 = 1.3413e-04
Loss = 4.9960e-01, PNorm = 38.4083, GNorm = 4.7804, lr_0 = 1.3788e-04
Loss = 6.1051e-01, PNorm = 38.4142, GNorm = 24.6265, lr_0 = 1.4163e-04
Loss = 5.2493e-01, PNorm = 38.4188, GNorm = 15.3046, lr_0 = 1.4537e-04
Loss = 5.5347e-01, PNorm = 38.4239, GNorm = 7.3995, lr_0 = 1.4913e-04
Loss = 5.2428e-01, PNorm = 38.4289, GNorm = 12.7870, lr_0 = 1.5288e-04
Loss = 4.6732e-01, PNorm = 38.4355, GNorm = 3.4853, lr_0 = 1.5662e-04
Loss = 4.8795e-01, PNorm = 38.4418, GNorm = 8.2837, lr_0 = 1.6038e-04
Loss = 4.1962e-01, PNorm = 38.4470, GNorm = 8.1929, lr_0 = 1.6412e-04
Loss = 4.5108e-01, PNorm = 38.4537, GNorm = 26.5659, lr_0 = 1.6788e-04
Loss = 4.5533e-01, PNorm = 38.4594, GNorm = 35.4023, lr_0 = 1.7163e-04
Loss = 5.1868e-01, PNorm = 38.4668, GNorm = 16.7350, lr_0 = 1.7538e-04
Loss = 4.3087e-01, PNorm = 38.4729, GNorm = 19.1154, lr_0 = 1.7913e-04
Loss = 4.3143e-01, PNorm = 38.4794, GNorm = 21.3095, lr_0 = 1.8288e-04
Loss = 3.8614e-01, PNorm = 38.4860, GNorm = 3.2860, lr_0 = 1.8662e-04
Loss = 4.1389e-01, PNorm = 38.4919, GNorm = 12.4776, lr_0 = 1.9038e-04
Loss = 4.1531e-01, PNorm = 38.4979, GNorm = 6.1335, lr_0 = 1.9413e-04
Loss = 4.2179e-01, PNorm = 38.5037, GNorm = 17.7990, lr_0 = 1.9788e-04
Loss = 4.3819e-01, PNorm = 38.5090, GNorm = 8.3605, lr_0 = 2.0163e-04
Loss = 4.1066e-01, PNorm = 38.5150, GNorm = 4.7197, lr_0 = 2.0537e-04
Loss = 4.2922e-01, PNorm = 38.5195, GNorm = 7.3883, lr_0 = 2.0913e-04
Loss = 3.5361e-01, PNorm = 38.5251, GNorm = 6.6401, lr_0 = 2.1288e-04
Loss = 3.9626e-01, PNorm = 38.5309, GNorm = 8.2988, lr_0 = 2.1663e-04
Loss = 3.6463e-01, PNorm = 38.5375, GNorm = 7.1142, lr_0 = 2.2038e-04
Loss = 3.7903e-01, PNorm = 38.5436, GNorm = 3.3834, lr_0 = 2.2412e-04
Loss = 4.1657e-01, PNorm = 38.5482, GNorm = 15.2911, lr_0 = 2.2787e-04
Loss = 4.4094e-01, PNorm = 38.5560, GNorm = 22.2839, lr_0 = 2.3163e-04
Loss = 4.3808e-01, PNorm = 38.5598, GNorm = 11.2648, lr_0 = 2.3538e-04
Loss = 3.3553e-01, PNorm = 38.5652, GNorm = 4.6452, lr_0 = 2.3913e-04
Loss = 3.3655e-01, PNorm = 38.5717, GNorm = 1.7322, lr_0 = 2.4288e-04
Loss = 3.8927e-01, PNorm = 38.5798, GNorm = 5.7631, lr_0 = 2.4662e-04
Loss = 4.1004e-01, PNorm = 38.5853, GNorm = 12.9223, lr_0 = 2.5038e-04
Loss = 3.8849e-01, PNorm = 38.5909, GNorm = 13.5796, lr_0 = 2.5413e-04
Loss = 3.3754e-01, PNorm = 38.5978, GNorm = 7.1502, lr_0 = 2.5788e-04
Loss = 3.2801e-01, PNorm = 38.6063, GNorm = 12.3965, lr_0 = 2.6163e-04
Loss = 3.1508e-01, PNorm = 38.6143, GNorm = 2.7371, lr_0 = 2.6537e-04
Loss = 2.8257e-01, PNorm = 38.6214, GNorm = 10.8030, lr_0 = 2.6912e-04
Loss = 3.8512e-01, PNorm = 38.6277, GNorm = 14.2657, lr_0 = 2.7288e-04
Loss = 3.2790e-01, PNorm = 38.6330, GNorm = 3.3637, lr_0 = 2.7663e-04
Loss = 3.4672e-01, PNorm = 38.6395, GNorm = 7.8670, lr_0 = 2.8038e-04
Loss = 3.3725e-01, PNorm = 38.6476, GNorm = 2.6310, lr_0 = 2.8413e-04
Loss = 2.9992e-01, PNorm = 38.6533, GNorm = 3.5171, lr_0 = 2.8787e-04
Loss = 3.2090e-01, PNorm = 38.6578, GNorm = 1.6118, lr_0 = 2.9163e-04
Loss = 2.8361e-01, PNorm = 38.6652, GNorm = 4.3443, lr_0 = 2.9538e-04
Loss = 3.4942e-01, PNorm = 38.6706, GNorm = 9.8822, lr_0 = 2.9913e-04
Loss = 4.2064e-01, PNorm = 38.6766, GNorm = 3.1851, lr_0 = 3.0288e-04
Loss = 3.8238e-01, PNorm = 38.6841, GNorm = 19.6140, lr_0 = 3.0662e-04
Loss = 4.1991e-01, PNorm = 38.6930, GNorm = 10.1226, lr_0 = 3.1037e-04
Loss = 4.5341e-01, PNorm = 38.6991, GNorm = 10.8989, lr_0 = 3.1413e-04
Loss = 3.8497e-01, PNorm = 38.7059, GNorm = 4.5561, lr_0 = 3.1788e-04
Loss = 3.4267e-01, PNorm = 38.7151, GNorm = 6.8375, lr_0 = 3.2163e-04
Loss = 3.8350e-01, PNorm = 38.7246, GNorm = 11.2229, lr_0 = 3.2538e-04
Loss = 3.4358e-01, PNorm = 38.7351, GNorm = 1.6922, lr_0 = 3.2912e-04
Loss = 3.4197e-01, PNorm = 38.7473, GNorm = 4.9569, lr_0 = 3.3288e-04
Loss = 2.8899e-01, PNorm = 38.7582, GNorm = 13.2048, lr_0 = 3.3663e-04
Loss = 3.4177e-01, PNorm = 38.7611, GNorm = 1.9570, lr_0 = 3.4038e-04
Loss = 3.0938e-01, PNorm = 38.7662, GNorm = 1.4287, lr_0 = 3.4413e-04
Loss = 3.5239e-01, PNorm = 38.7780, GNorm = 3.1927, lr_0 = 3.4787e-04
Loss = 2.9684e-01, PNorm = 38.7872, GNorm = 4.9931, lr_0 = 3.5162e-04
Loss = 3.1796e-01, PNorm = 38.7961, GNorm = 13.6774, lr_0 = 3.5538e-04
Loss = 3.1202e-01, PNorm = 38.8039, GNorm = 9.6716, lr_0 = 3.5913e-04
Loss = 3.1011e-01, PNorm = 38.8115, GNorm = 11.3716, lr_0 = 3.6288e-04
Loss = 2.8439e-01, PNorm = 38.8177, GNorm = 2.3208, lr_0 = 3.6662e-04
Loss = 2.6822e-01, PNorm = 38.8266, GNorm = 8.0862, lr_0 = 3.7037e-04
Loss = 2.6321e-01, PNorm = 38.8337, GNorm = 5.4116, lr_0 = 3.7413e-04
Loss = 3.3340e-01, PNorm = 38.8398, GNorm = 6.5572, lr_0 = 3.7788e-04
Loss = 2.5687e-01, PNorm = 38.8455, GNorm = 9.9685, lr_0 = 3.8163e-04
Loss = 3.0530e-01, PNorm = 38.8531, GNorm = 2.4308, lr_0 = 3.8537e-04
Loss = 2.7584e-01, PNorm = 38.8638, GNorm = 6.4429, lr_0 = 3.8912e-04
Loss = 2.6257e-01, PNorm = 38.8682, GNorm = 5.2730, lr_0 = 3.9287e-04
Loss = 3.0232e-01, PNorm = 38.8737, GNorm = 7.4996, lr_0 = 3.9663e-04
Loss = 2.7076e-01, PNorm = 38.8850, GNorm = 3.1855, lr_0 = 4.0038e-04
Loss = 2.9138e-01, PNorm = 38.8982, GNorm = 2.0445, lr_0 = 4.0413e-04
Loss = 2.8872e-01, PNorm = 38.9050, GNorm = 11.9156, lr_0 = 4.0787e-04
Loss = 2.8171e-01, PNorm = 38.9148, GNorm = 8.8257, lr_0 = 4.1162e-04
Loss = 2.6627e-01, PNorm = 38.9238, GNorm = 4.4419, lr_0 = 4.1537e-04
Loss = 2.9428e-01, PNorm = 38.9326, GNorm = 7.0378, lr_0 = 4.1913e-04
Loss = 3.1720e-01, PNorm = 38.9478, GNorm = 3.2069, lr_0 = 4.2288e-04
Loss = 3.2274e-01, PNorm = 38.9541, GNorm = 8.3893, lr_0 = 4.2662e-04
Loss = 3.4399e-01, PNorm = 38.9649, GNorm = 4.2737, lr_0 = 4.3037e-04
Loss = 2.7069e-01, PNorm = 38.9765, GNorm = 2.0369, lr_0 = 4.3412e-04
Loss = 2.7648e-01, PNorm = 38.9921, GNorm = 1.2640, lr_0 = 4.3788e-04
Loss = 3.0151e-01, PNorm = 39.0013, GNorm = 7.5516, lr_0 = 4.4163e-04
Loss = 3.3149e-01, PNorm = 39.0101, GNorm = 4.2845, lr_0 = 4.4538e-04
Loss = 2.7749e-01, PNorm = 39.0248, GNorm = 8.6298, lr_0 = 4.4912e-04
Loss = 2.8142e-01, PNorm = 39.0358, GNorm = 3.3991, lr_0 = 4.5287e-04
Loss = 2.7670e-01, PNorm = 39.0448, GNorm = 2.4838, lr_0 = 4.5662e-04
Loss = 2.7967e-01, PNorm = 39.0543, GNorm = 5.8335, lr_0 = 4.6038e-04
Loss = 2.9943e-01, PNorm = 39.0643, GNorm = 19.7226, lr_0 = 4.6413e-04
Loss = 3.2324e-01, PNorm = 39.0717, GNorm = 3.0314, lr_0 = 4.6787e-04
Loss = 3.2186e-01, PNorm = 39.0800, GNorm = 2.2916, lr_0 = 4.7162e-04
Loss = 2.7785e-01, PNorm = 39.0890, GNorm = 1.5451, lr_0 = 4.7537e-04
Loss = 3.1682e-01, PNorm = 39.1008, GNorm = 3.2713, lr_0 = 4.7913e-04
Loss = 2.5320e-01, PNorm = 39.1188, GNorm = 5.3319, lr_0 = 4.8288e-04
Loss = 2.2327e-01, PNorm = 39.1329, GNorm = 4.7872, lr_0 = 4.8663e-04
Loss = 2.9134e-01, PNorm = 39.1360, GNorm = 4.1867, lr_0 = 4.9038e-04
Loss = 3.2867e-01, PNorm = 39.1484, GNorm = 10.1540, lr_0 = 4.9412e-04
Loss = 3.5404e-01, PNorm = 39.1634, GNorm = 12.5299, lr_0 = 4.9788e-04
Loss = 3.2581e-01, PNorm = 39.1827, GNorm = 12.2888, lr_0 = 5.0163e-04
Loss = 3.1075e-01, PNorm = 39.2050, GNorm = 8.8636, lr_0 = 5.0538e-04
Loss = 2.4625e-01, PNorm = 39.2182, GNorm = 2.3530, lr_0 = 5.0913e-04
Loss = 2.3524e-01, PNorm = 39.2305, GNorm = 2.4914, lr_0 = 5.1287e-04
Loss = 2.6816e-01, PNorm = 39.2406, GNorm = 4.4212, lr_0 = 5.1663e-04
Loss = 2.2871e-01, PNorm = 39.2477, GNorm = 1.1066, lr_0 = 5.2038e-04
Loss = 2.7843e-01, PNorm = 39.2623, GNorm = 8.7108, lr_0 = 5.2413e-04
Loss = 3.4997e-01, PNorm = 39.2745, GNorm = 10.4671, lr_0 = 5.2788e-04
Loss = 2.8950e-01, PNorm = 39.2896, GNorm = 1.9114, lr_0 = 5.3162e-04
Loss = 2.6662e-01, PNorm = 39.3044, GNorm = 2.3177, lr_0 = 5.3538e-04
Loss = 3.0714e-01, PNorm = 39.3162, GNorm = 13.0133, lr_0 = 5.3912e-04
Loss = 2.7062e-01, PNorm = 39.3313, GNorm = 13.0169, lr_0 = 5.4288e-04
Loss = 2.9193e-01, PNorm = 39.3428, GNorm = 8.1746, lr_0 = 5.4663e-04
Loss = 3.1285e-01, PNorm = 39.3540, GNorm = 5.6628, lr_0 = 5.5038e-04
Validation mae = 0.592116
Epoch 1
Loss = 2.5772e-01, PNorm = 39.3713, GNorm = 7.9950, lr_0 = 5.5413e-04
Loss = 2.5056e-01, PNorm = 39.3884, GNorm = 0.7109, lr_0 = 5.5787e-04
Loss = 2.6554e-01, PNorm = 39.4006, GNorm = 1.2598, lr_0 = 5.6163e-04
Loss = 2.6556e-01, PNorm = 39.4180, GNorm = 3.7822, lr_0 = 5.6538e-04
Loss = 2.5325e-01, PNorm = 39.4311, GNorm = 2.7099, lr_0 = 5.6913e-04
Loss = 2.5165e-01, PNorm = 39.4441, GNorm = 5.7686, lr_0 = 5.7288e-04
Loss = 2.4833e-01, PNorm = 39.4578, GNorm = 5.0294, lr_0 = 5.7662e-04
Loss = 2.3032e-01, PNorm = 39.4743, GNorm = 4.3533, lr_0 = 5.8038e-04
Loss = 2.4297e-01, PNorm = 39.4907, GNorm = 8.6201, lr_0 = 5.8413e-04
Loss = 2.6256e-01, PNorm = 39.5001, GNorm = 1.9786, lr_0 = 5.8788e-04
Loss = 2.5719e-01, PNorm = 39.5173, GNorm = 11.8581, lr_0 = 5.9163e-04
Loss = 2.4532e-01, PNorm = 39.5367, GNorm = 7.3915, lr_0 = 5.9538e-04
Loss = 2.6550e-01, PNorm = 39.5497, GNorm = 4.3170, lr_0 = 5.9913e-04
Loss = 2.4734e-01, PNorm = 39.5586, GNorm = 1.5159, lr_0 = 6.0288e-04
Loss = 3.6144e-01, PNorm = 39.5706, GNorm = 13.4165, lr_0 = 6.0663e-04
Loss = 3.2349e-01, PNorm = 39.5833, GNorm = 10.6102, lr_0 = 6.1038e-04
Loss = 3.6069e-01, PNorm = 39.6036, GNorm = 3.2416, lr_0 = 6.1413e-04
Loss = 3.1457e-01, PNorm = 39.6235, GNorm = 7.0013, lr_0 = 6.1788e-04
Loss = 2.9820e-01, PNorm = 39.6440, GNorm = 4.1098, lr_0 = 6.2163e-04
Loss = 2.3870e-01, PNorm = 39.6659, GNorm = 3.3162, lr_0 = 6.2538e-04
Loss = 2.4899e-01, PNorm = 39.6815, GNorm = 8.2090, lr_0 = 6.2913e-04
Loss = 2.2167e-01, PNorm = 39.6960, GNorm = 2.7360, lr_0 = 6.3288e-04
Loss = 2.8860e-01, PNorm = 39.7128, GNorm = 6.6557, lr_0 = 6.3663e-04
Loss = 2.4989e-01, PNorm = 39.7278, GNorm = 6.6127, lr_0 = 6.4038e-04
Loss = 2.5551e-01, PNorm = 39.7389, GNorm = 3.6069, lr_0 = 6.4413e-04
Loss = 2.7213e-01, PNorm = 39.7510, GNorm = 1.2872, lr_0 = 6.4788e-04
Loss = 2.5908e-01, PNorm = 39.7654, GNorm = 5.3279, lr_0 = 6.5163e-04
Loss = 2.4448e-01, PNorm = 39.7771, GNorm = 1.9698, lr_0 = 6.5538e-04
Loss = 2.4752e-01, PNorm = 39.7925, GNorm = 3.3905, lr_0 = 6.5913e-04
Loss = 2.6342e-01, PNorm = 39.8112, GNorm = 6.4408, lr_0 = 6.6288e-04
Loss = 3.0060e-01, PNorm = 39.8297, GNorm = 7.4123, lr_0 = 6.6663e-04
Loss = 2.6063e-01, PNorm = 39.8501, GNorm = 6.9927, lr_0 = 6.7038e-04
Loss = 2.6764e-01, PNorm = 39.8742, GNorm = 6.9659, lr_0 = 6.7413e-04
Loss = 2.6168e-01, PNorm = 39.8892, GNorm = 6.2307, lr_0 = 6.7788e-04
Loss = 2.2819e-01, PNorm = 39.9004, GNorm = 2.8579, lr_0 = 6.8163e-04
Loss = 2.6542e-01, PNorm = 39.9207, GNorm = 2.2682, lr_0 = 6.8538e-04
Loss = 2.3875e-01, PNorm = 39.9404, GNorm = 4.4950, lr_0 = 6.8913e-04
Loss = 2.5050e-01, PNorm = 39.9527, GNorm = 7.5954, lr_0 = 6.9288e-04
Loss = 2.4694e-01, PNorm = 39.9712, GNorm = 4.1392, lr_0 = 6.9663e-04
Loss = 2.4330e-01, PNorm = 39.9868, GNorm = 1.3277, lr_0 = 7.0038e-04
Loss = 2.2159e-01, PNorm = 39.9960, GNorm = 2.2993, lr_0 = 7.0413e-04
Loss = 2.5253e-01, PNorm = 40.0107, GNorm = 1.4042, lr_0 = 7.0788e-04
Loss = 2.4842e-01, PNorm = 40.0321, GNorm = 7.0662, lr_0 = 7.1163e-04
Loss = 3.3101e-01, PNorm = 40.0458, GNorm = 1.7988, lr_0 = 7.1538e-04
Loss = 2.7695e-01, PNorm = 40.0696, GNorm = 3.6394, lr_0 = 7.1913e-04
Loss = 2.4459e-01, PNorm = 40.0979, GNorm = 2.5347, lr_0 = 7.2288e-04
Loss = 2.4285e-01, PNorm = 40.1232, GNorm = 2.6145, lr_0 = 7.2663e-04
Loss = 2.9593e-01, PNorm = 40.1401, GNorm = 1.9206, lr_0 = 7.3038e-04
Loss = 2.7223e-01, PNorm = 40.1605, GNorm = 3.9053, lr_0 = 7.3413e-04
Loss = 2.5281e-01, PNorm = 40.1865, GNorm = 3.2826, lr_0 = 7.3788e-04
Loss = 2.6995e-01, PNorm = 40.2071, GNorm = 6.6780, lr_0 = 7.4163e-04
Loss = 2.9203e-01, PNorm = 40.2325, GNorm = 2.5883, lr_0 = 7.4538e-04
Loss = 2.8138e-01, PNorm = 40.2537, GNorm = 4.6305, lr_0 = 7.4913e-04
Loss = 2.3858e-01, PNorm = 40.2766, GNorm = 1.2972, lr_0 = 7.5288e-04
Loss = 2.6856e-01, PNorm = 40.2984, GNorm = 1.8301, lr_0 = 7.5663e-04
Loss = 2.5614e-01, PNorm = 40.3228, GNorm = 5.2554, lr_0 = 7.6038e-04
Loss = 2.4260e-01, PNorm = 40.3483, GNorm = 3.1446, lr_0 = 7.6413e-04
Loss = 2.4304e-01, PNorm = 40.3743, GNorm = 3.6412, lr_0 = 7.6788e-04
Loss = 2.4798e-01, PNorm = 40.3873, GNorm = 1.8981, lr_0 = 7.7163e-04
Loss = 2.2373e-01, PNorm = 40.4027, GNorm = 1.3310, lr_0 = 7.7538e-04
Loss = 2.0513e-01, PNorm = 40.4238, GNorm = 1.7177, lr_0 = 7.7913e-04
Loss = 2.4920e-01, PNorm = 40.4411, GNorm = 1.6936, lr_0 = 7.8288e-04
Loss = 2.7446e-01, PNorm = 40.4549, GNorm = 4.2675, lr_0 = 7.8663e-04
Loss = 2.6703e-01, PNorm = 40.4770, GNorm = 5.1607, lr_0 = 7.9038e-04
Loss = 2.8042e-01, PNorm = 40.5058, GNorm = 2.1531, lr_0 = 7.9413e-04
Loss = 2.6599e-01, PNorm = 40.5345, GNorm = 7.4581, lr_0 = 7.9788e-04
Loss = 2.2212e-01, PNorm = 40.5665, GNorm = 1.8463, lr_0 = 8.0163e-04
Loss = 2.1363e-01, PNorm = 40.5993, GNorm = 2.4318, lr_0 = 8.0538e-04
Loss = 2.2023e-01, PNorm = 40.6171, GNorm = 2.0549, lr_0 = 8.0913e-04
Loss = 2.2588e-01, PNorm = 40.6458, GNorm = 2.6331, lr_0 = 8.1288e-04
Loss = 2.1008e-01, PNorm = 40.6644, GNorm = 1.6505, lr_0 = 8.1663e-04
Loss = 2.4599e-01, PNorm = 40.6773, GNorm = 1.2521, lr_0 = 8.2038e-04
Loss = 2.4422e-01, PNorm = 40.6947, GNorm = 1.3050, lr_0 = 8.2413e-04
Loss = 2.1605e-01, PNorm = 40.7265, GNorm = 9.2451, lr_0 = 8.2788e-04
Loss = 2.5253e-01, PNorm = 40.7461, GNorm = 5.9181, lr_0 = 8.3163e-04
Loss = 2.5619e-01, PNorm = 40.7747, GNorm = 1.8720, lr_0 = 8.3538e-04
Loss = 1.9510e-01, PNorm = 40.8051, GNorm = 2.6440, lr_0 = 8.3913e-04
Loss = 2.5646e-01, PNorm = 40.8327, GNorm = 7.6801, lr_0 = 8.4288e-04
Loss = 2.1429e-01, PNorm = 40.8608, GNorm = 5.2810, lr_0 = 8.4663e-04
Loss = 2.2865e-01, PNorm = 40.8828, GNorm = 3.3947, lr_0 = 8.5038e-04
Loss = 1.9912e-01, PNorm = 40.8977, GNorm = 1.1581, lr_0 = 8.5413e-04
Loss = 2.3490e-01, PNorm = 40.9136, GNorm = 4.5061, lr_0 = 8.5788e-04
Loss = 2.4965e-01, PNorm = 40.9370, GNorm = 2.0615, lr_0 = 8.6163e-04
Loss = 1.9242e-01, PNorm = 40.9637, GNorm = 2.9895, lr_0 = 8.6538e-04
Loss = 2.3901e-01, PNorm = 40.9830, GNorm = 1.6969, lr_0 = 8.6913e-04
Loss = 2.1365e-01, PNorm = 41.0088, GNorm = 2.5993, lr_0 = 8.7288e-04
Loss = 2.1887e-01, PNorm = 41.0331, GNorm = 4.1968, lr_0 = 8.7663e-04
Loss = 3.0854e-01, PNorm = 41.0487, GNorm = 1.2308, lr_0 = 8.8038e-04
Loss = 2.8338e-01, PNorm = 41.0734, GNorm = 6.1945, lr_0 = 8.8413e-04
Loss = 2.7400e-01, PNorm = 41.1028, GNorm = 1.8834, lr_0 = 8.8788e-04
Loss = 2.3019e-01, PNorm = 41.1397, GNorm = 2.5742, lr_0 = 8.9163e-04
Loss = 2.6464e-01, PNorm = 41.1658, GNorm = 6.6908, lr_0 = 8.9538e-04
Loss = 2.8933e-01, PNorm = 41.2029, GNorm = 6.3465, lr_0 = 8.9913e-04
Loss = 2.6077e-01, PNorm = 41.2368, GNorm = 0.9412, lr_0 = 9.0288e-04
Loss = 2.1911e-01, PNorm = 41.2721, GNorm = 1.8143, lr_0 = 9.0663e-04
Loss = 2.1150e-01, PNorm = 41.3011, GNorm = 1.1817, lr_0 = 9.1038e-04
Loss = 2.1755e-01, PNorm = 41.3255, GNorm = 1.1841, lr_0 = 9.1413e-04
Loss = 2.6914e-01, PNorm = 41.3549, GNorm = 2.8903, lr_0 = 9.1788e-04
Loss = 2.2324e-01, PNorm = 41.3826, GNorm = 2.7745, lr_0 = 9.2163e-04
Loss = 2.4002e-01, PNorm = 41.4139, GNorm = 2.5247, lr_0 = 9.2538e-04
Loss = 1.9832e-01, PNorm = 41.4349, GNorm = 2.4000, lr_0 = 9.2913e-04
Loss = 2.0567e-01, PNorm = 41.4592, GNorm = 1.1926, lr_0 = 9.3288e-04
Loss = 2.1303e-01, PNorm = 41.4887, GNorm = 2.8580, lr_0 = 9.3663e-04
Loss = 2.2628e-01, PNorm = 41.5183, GNorm = 4.5820, lr_0 = 9.4038e-04
Loss = 2.3821e-01, PNorm = 41.5485, GNorm = 4.6748, lr_0 = 9.4413e-04
Loss = 2.0977e-01, PNorm = 41.5818, GNorm = 2.4814, lr_0 = 9.4788e-04
Loss = 2.7198e-01, PNorm = 41.6094, GNorm = 4.7671, lr_0 = 9.5163e-04
Loss = 2.0569e-01, PNorm = 41.6355, GNorm = 1.8661, lr_0 = 9.5538e-04
Loss = 2.5194e-01, PNorm = 41.6752, GNorm = 2.2896, lr_0 = 9.5913e-04
Loss = 2.1860e-01, PNorm = 41.7036, GNorm = 4.8706, lr_0 = 9.6288e-04
Loss = 2.4043e-01, PNorm = 41.7243, GNorm = 1.0868, lr_0 = 9.6663e-04
Loss = 2.4879e-01, PNorm = 41.7456, GNorm = 3.1004, lr_0 = 9.7038e-04
Loss = 2.5811e-01, PNorm = 41.7833, GNorm = 1.5884, lr_0 = 9.7413e-04
Loss = 2.9115e-01, PNorm = 41.8175, GNorm = 0.8327, lr_0 = 9.7788e-04
Loss = 2.0694e-01, PNorm = 41.8532, GNorm = 2.5300, lr_0 = 9.8163e-04
Loss = 2.7127e-01, PNorm = 41.8809, GNorm = 2.4417, lr_0 = 9.8537e-04
Loss = 2.3941e-01, PNorm = 41.9260, GNorm = 2.6971, lr_0 = 9.8912e-04
Loss = 2.4415e-01, PNorm = 41.9714, GNorm = 1.4249, lr_0 = 9.9288e-04
Loss = 2.2684e-01, PNorm = 42.0002, GNorm = 2.2115, lr_0 = 9.9663e-04
Loss = 2.3510e-01, PNorm = 42.0310, GNorm = 5.1864, lr_0 = 9.9993e-04
Validation mae = 0.513790
Epoch 2
Loss = 2.1488e-01, PNorm = 42.0645, GNorm = 2.7781, lr_0 = 9.9925e-04
Loss = 2.2051e-01, PNorm = 42.0904, GNorm = 4.1717, lr_0 = 9.9856e-04
Loss = 2.2165e-01, PNorm = 42.1055, GNorm = 3.2141, lr_0 = 9.9788e-04
Loss = 2.1297e-01, PNorm = 42.1321, GNorm = 5.9368, lr_0 = 9.9719e-04
Loss = 2.4888e-01, PNorm = 42.1693, GNorm = 5.7918, lr_0 = 9.9651e-04
Loss = 2.6032e-01, PNorm = 42.2161, GNorm = 2.9794, lr_0 = 9.9583e-04
Loss = 2.2966e-01, PNorm = 42.2643, GNorm = 4.0227, lr_0 = 9.9515e-04
Loss = 2.2733e-01, PNorm = 42.3019, GNorm = 0.8159, lr_0 = 9.9446e-04
Loss = 2.2792e-01, PNorm = 42.3329, GNorm = 2.5863, lr_0 = 9.9378e-04
Loss = 2.5790e-01, PNorm = 42.3630, GNorm = 1.1154, lr_0 = 9.9310e-04
Loss = 2.2028e-01, PNorm = 42.3866, GNorm = 4.7681, lr_0 = 9.9242e-04
Loss = 2.3591e-01, PNorm = 42.4229, GNorm = 1.8114, lr_0 = 9.9174e-04
Loss = 2.1388e-01, PNorm = 42.4501, GNorm = 2.6602, lr_0 = 9.9106e-04
Loss = 2.2011e-01, PNorm = 42.4754, GNorm = 7.3703, lr_0 = 9.9038e-04
Loss = 2.3166e-01, PNorm = 42.4975, GNorm = 1.4820, lr_0 = 9.8971e-04
Loss = 2.1292e-01, PNorm = 42.5292, GNorm = 1.3586, lr_0 = 9.8903e-04
Loss = 1.8426e-01, PNorm = 42.5656, GNorm = 0.6789, lr_0 = 9.8835e-04
Loss = 1.9343e-01, PNorm = 42.5927, GNorm = 1.2848, lr_0 = 9.8767e-04
Loss = 1.8133e-01, PNorm = 42.6189, GNorm = 4.3392, lr_0 = 9.8700e-04
Loss = 1.7872e-01, PNorm = 42.6320, GNorm = 1.2593, lr_0 = 9.8632e-04
Loss = 1.7543e-01, PNorm = 42.6561, GNorm = 2.6851, lr_0 = 9.8564e-04
Loss = 2.1693e-01, PNorm = 42.6805, GNorm = 1.3069, lr_0 = 9.8497e-04
Loss = 2.0501e-01, PNorm = 42.7060, GNorm = 3.9062, lr_0 = 9.8429e-04
Loss = 1.8165e-01, PNorm = 42.7286, GNorm = 1.8907, lr_0 = 9.8362e-04
Loss = 2.0495e-01, PNorm = 42.7484, GNorm = 4.1333, lr_0 = 9.8295e-04
Loss = 1.8918e-01, PNorm = 42.7710, GNorm = 2.6583, lr_0 = 9.8227e-04
Loss = 2.1494e-01, PNorm = 42.8031, GNorm = 0.7186, lr_0 = 9.8160e-04
Loss = 2.0091e-01, PNorm = 42.8367, GNorm = 6.9709, lr_0 = 9.8093e-04
Loss = 2.1997e-01, PNorm = 42.8694, GNorm = 4.1982, lr_0 = 9.8026e-04
Loss = 2.4909e-01, PNorm = 42.9029, GNorm = 4.2952, lr_0 = 9.7958e-04
Loss = 2.2540e-01, PNorm = 42.9346, GNorm = 4.9706, lr_0 = 9.7891e-04
Loss = 2.0821e-01, PNorm = 42.9641, GNorm = 2.8594, lr_0 = 9.7824e-04
Loss = 1.8901e-01, PNorm = 42.9898, GNorm = 0.8036, lr_0 = 9.7757e-04
Loss = 1.9529e-01, PNorm = 43.0131, GNorm = 1.4953, lr_0 = 9.7690e-04
Loss = 1.9051e-01, PNorm = 43.0386, GNorm = 5.6842, lr_0 = 9.7623e-04
Loss = 2.1413e-01, PNorm = 43.0697, GNorm = 1.1212, lr_0 = 9.7556e-04
Loss = 2.2113e-01, PNorm = 43.1060, GNorm = 1.0282, lr_0 = 9.7490e-04
Loss = 1.9838e-01, PNorm = 43.1346, GNorm = 2.4167, lr_0 = 9.7423e-04
Loss = 2.1153e-01, PNorm = 43.1634, GNorm = 3.4808, lr_0 = 9.7356e-04
Loss = 2.2789e-01, PNorm = 43.1833, GNorm = 1.3645, lr_0 = 9.7289e-04
Loss = 1.8593e-01, PNorm = 43.2034, GNorm = 2.1751, lr_0 = 9.7223e-04
Loss = 2.0680e-01, PNorm = 43.2257, GNorm = 3.1926, lr_0 = 9.7156e-04
Loss = 1.9473e-01, PNorm = 43.2476, GNorm = 1.9998, lr_0 = 9.7090e-04
Loss = 1.9906e-01, PNorm = 43.2736, GNorm = 2.6013, lr_0 = 9.7023e-04
Loss = 2.0356e-01, PNorm = 43.2930, GNorm = 2.3917, lr_0 = 9.6957e-04
Loss = 1.8387e-01, PNorm = 43.3076, GNorm = 2.5710, lr_0 = 9.6890e-04
Loss = 2.2788e-01, PNorm = 43.3363, GNorm = 2.4883, lr_0 = 9.6824e-04
Loss = 2.4121e-01, PNorm = 43.3655, GNorm = 0.6908, lr_0 = 9.6757e-04
Loss = 2.2861e-01, PNorm = 43.3972, GNorm = 6.0715, lr_0 = 9.6691e-04
Loss = 2.0582e-01, PNorm = 43.4295, GNorm = 1.2802, lr_0 = 9.6625e-04
Loss = 2.0564e-01, PNorm = 43.4542, GNorm = 3.2986, lr_0 = 9.6559e-04
Loss = 1.9043e-01, PNorm = 43.4765, GNorm = 1.1144, lr_0 = 9.6493e-04
Loss = 2.0338e-01, PNorm = 43.4995, GNorm = 2.1266, lr_0 = 9.6427e-04
Loss = 1.9447e-01, PNorm = 43.5344, GNorm = 1.0520, lr_0 = 9.6360e-04
Loss = 2.0060e-01, PNorm = 43.5662, GNorm = 2.4908, lr_0 = 9.6294e-04
Loss = 1.8325e-01, PNorm = 43.6082, GNorm = 3.8816, lr_0 = 9.6228e-04
Loss = 1.9747e-01, PNorm = 43.6363, GNorm = 2.0720, lr_0 = 9.6163e-04
Loss = 1.8503e-01, PNorm = 43.6613, GNorm = 1.6384, lr_0 = 9.6097e-04
Loss = 1.7046e-01, PNorm = 43.6757, GNorm = 1.6118, lr_0 = 9.6031e-04
Loss = 1.7257e-01, PNorm = 43.6846, GNorm = 1.9589, lr_0 = 9.5965e-04
Loss = 1.8916e-01, PNorm = 43.7037, GNorm = 1.5530, lr_0 = 9.5899e-04
Loss = 2.0121e-01, PNorm = 43.7356, GNorm = 1.8661, lr_0 = 9.5834e-04
Loss = 2.0408e-01, PNorm = 43.7711, GNorm = 4.4162, lr_0 = 9.5768e-04
Loss = 1.6271e-01, PNorm = 43.7962, GNorm = 1.2866, lr_0 = 9.5702e-04
Loss = 1.8864e-01, PNorm = 43.8145, GNorm = 6.2241, lr_0 = 9.5637e-04
Loss = 2.5158e-01, PNorm = 43.8434, GNorm = 6.9808, lr_0 = 9.5571e-04
Loss = 1.9777e-01, PNorm = 43.8810, GNorm = 0.9954, lr_0 = 9.5506e-04
Loss = 2.2739e-01, PNorm = 43.9158, GNorm = 2.2412, lr_0 = 9.5440e-04
Loss = 2.2224e-01, PNorm = 43.9535, GNorm = 3.2369, lr_0 = 9.5375e-04
Loss = 2.1385e-01, PNorm = 43.9774, GNorm = 1.6160, lr_0 = 9.5310e-04
Loss = 2.0449e-01, PNorm = 43.9980, GNorm = 1.6937, lr_0 = 9.5244e-04
Loss = 1.8618e-01, PNorm = 44.0301, GNorm = 1.4005, lr_0 = 9.5179e-04
Loss = 1.8773e-01, PNorm = 44.0623, GNorm = 2.6821, lr_0 = 9.5114e-04
Loss = 2.0475e-01, PNorm = 44.0895, GNorm = 1.4454, lr_0 = 9.5049e-04
Loss = 1.9831e-01, PNorm = 44.1134, GNorm = 1.9786, lr_0 = 9.4984e-04
Loss = 2.0164e-01, PNorm = 44.1357, GNorm = 2.0330, lr_0 = 9.4919e-04
Loss = 1.7540e-01, PNorm = 44.1604, GNorm = 2.2917, lr_0 = 9.4854e-04
Loss = 2.7400e-01, PNorm = 44.1924, GNorm = 3.0102, lr_0 = 9.4789e-04
Loss = 2.1509e-01, PNorm = 44.2243, GNorm = 4.2699, lr_0 = 9.4724e-04
Loss = 2.0732e-01, PNorm = 44.2617, GNorm = 3.4850, lr_0 = 9.4659e-04
Loss = 1.8179e-01, PNorm = 44.2944, GNorm = 1.2070, lr_0 = 9.4594e-04
Loss = 2.1326e-01, PNorm = 44.3151, GNorm = 3.5548, lr_0 = 9.4529e-04
Loss = 2.1268e-01, PNorm = 44.3501, GNorm = 1.1137, lr_0 = 9.4464e-04
Loss = 2.0713e-01, PNorm = 44.3738, GNorm = 1.2419, lr_0 = 9.4400e-04
Loss = 2.0641e-01, PNorm = 44.3971, GNorm = 3.4120, lr_0 = 9.4335e-04
Loss = 1.6761e-01, PNorm = 44.4313, GNorm = 3.0537, lr_0 = 9.4270e-04
Loss = 1.4600e-01, PNorm = 44.4570, GNorm = 0.9890, lr_0 = 9.4206e-04
Loss = 1.9507e-01, PNorm = 44.4675, GNorm = 0.7879, lr_0 = 9.4141e-04
Loss = 1.7849e-01, PNorm = 44.4938, GNorm = 1.7149, lr_0 = 9.4077e-04
Loss = 1.8423e-01, PNorm = 44.5156, GNorm = 1.2884, lr_0 = 9.4012e-04
Loss = 2.1316e-01, PNorm = 44.5415, GNorm = 0.9972, lr_0 = 9.3948e-04
Loss = 1.5490e-01, PNorm = 44.5666, GNorm = 1.8818, lr_0 = 9.3884e-04
Loss = 1.7534e-01, PNorm = 44.5915, GNorm = 2.0740, lr_0 = 9.3819e-04
Loss = 1.9771e-01, PNorm = 44.6284, GNorm = 1.1918, lr_0 = 9.3755e-04
Loss = 2.1309e-01, PNorm = 44.6580, GNorm = 2.5406, lr_0 = 9.3691e-04
Loss = 2.0059e-01, PNorm = 44.6798, GNorm = 2.4975, lr_0 = 9.3627e-04
Loss = 1.9144e-01, PNorm = 44.7081, GNorm = 1.6109, lr_0 = 9.3562e-04
Loss = 1.9262e-01, PNorm = 44.7308, GNorm = 1.1767, lr_0 = 9.3498e-04
Loss = 2.0268e-01, PNorm = 44.7564, GNorm = 1.0360, lr_0 = 9.3434e-04
Loss = 1.5029e-01, PNorm = 44.7731, GNorm = 0.8311, lr_0 = 9.3370e-04
Loss = 1.5025e-01, PNorm = 44.7915, GNorm = 0.8497, lr_0 = 9.3306e-04
Loss = 1.7147e-01, PNorm = 44.8096, GNorm = 0.5899, lr_0 = 9.3242e-04
Loss = 1.7166e-01, PNorm = 44.8353, GNorm = 2.7134, lr_0 = 9.3178e-04
Loss = 1.7872e-01, PNorm = 44.8578, GNorm = 2.0731, lr_0 = 9.3115e-04
Loss = 1.6861e-01, PNorm = 44.8857, GNorm = 0.9815, lr_0 = 9.3051e-04
Loss = 1.6886e-01, PNorm = 44.9129, GNorm = 2.5385, lr_0 = 9.2987e-04
Loss = 2.1742e-01, PNorm = 44.9378, GNorm = 3.1499, lr_0 = 9.2923e-04
Loss = 1.8246e-01, PNorm = 44.9628, GNorm = 4.5832, lr_0 = 9.2860e-04
Loss = 2.2722e-01, PNorm = 44.9865, GNorm = 0.8527, lr_0 = 9.2796e-04
Loss = 1.9981e-01, PNorm = 45.0137, GNorm = 1.0022, lr_0 = 9.2733e-04
Loss = 1.6854e-01, PNorm = 45.0391, GNorm = 1.7304, lr_0 = 9.2669e-04
Loss = 1.7485e-01, PNorm = 45.0583, GNorm = 1.9698, lr_0 = 9.2606e-04
Loss = 1.7807e-01, PNorm = 45.0834, GNorm = 0.9328, lr_0 = 9.2542e-04
Loss = 1.6931e-01, PNorm = 45.1107, GNorm = 1.4445, lr_0 = 9.2479e-04
Loss = 1.7250e-01, PNorm = 45.1255, GNorm = 0.9588, lr_0 = 9.2415e-04
Loss = 1.9546e-01, PNorm = 45.1435, GNorm = 1.0496, lr_0 = 9.2352e-04
Loss = 1.7269e-01, PNorm = 45.1732, GNorm = 3.0023, lr_0 = 9.2289e-04
Loss = 1.8778e-01, PNorm = 45.2068, GNorm = 1.9451, lr_0 = 9.2226e-04
Loss = 1.9439e-01, PNorm = 45.2278, GNorm = 2.7601, lr_0 = 9.2162e-04
Loss = 1.6531e-01, PNorm = 45.2571, GNorm = 1.6642, lr_0 = 9.2099e-04
Validation mae = 0.478772
Epoch 3
Loss = 1.8303e-01, PNorm = 45.2910, GNorm = 1.6134, lr_0 = 9.2036e-04
Loss = 1.6805e-01, PNorm = 45.3203, GNorm = 2.6759, lr_0 = 9.1973e-04
Loss = 1.6139e-01, PNorm = 45.3387, GNorm = 0.9057, lr_0 = 9.1910e-04
Loss = 1.7305e-01, PNorm = 45.3493, GNorm = 2.2201, lr_0 = 9.1847e-04
Loss = 1.6542e-01, PNorm = 45.3712, GNorm = 1.1185, lr_0 = 9.1784e-04
Loss = 1.8179e-01, PNorm = 45.3999, GNorm = 2.0595, lr_0 = 9.1721e-04
Loss = 1.7771e-01, PNorm = 45.4225, GNorm = 2.0241, lr_0 = 9.1658e-04
Loss = 1.6088e-01, PNorm = 45.4470, GNorm = 1.7828, lr_0 = 9.1596e-04
Loss = 1.7236e-01, PNorm = 45.4816, GNorm = 2.2243, lr_0 = 9.1533e-04
Loss = 2.1089e-01, PNorm = 45.5089, GNorm = 1.0043, lr_0 = 9.1470e-04
Loss = 1.9452e-01, PNorm = 45.5378, GNorm = 2.2171, lr_0 = 9.1408e-04
Loss = 1.6218e-01, PNorm = 45.5731, GNorm = 1.2247, lr_0 = 9.1345e-04
Loss = 1.6479e-01, PNorm = 45.6034, GNorm = 1.0982, lr_0 = 9.1282e-04
Loss = 1.8431e-01, PNorm = 45.6172, GNorm = 0.9548, lr_0 = 9.1220e-04
Loss = 1.7359e-01, PNorm = 45.6367, GNorm = 0.7946, lr_0 = 9.1157e-04
Loss = 1.7756e-01, PNorm = 45.6527, GNorm = 1.6995, lr_0 = 9.1095e-04
Loss = 1.6937e-01, PNorm = 45.6720, GNorm = 0.8715, lr_0 = 9.1032e-04
Loss = 1.8644e-01, PNorm = 45.6933, GNorm = 3.1254, lr_0 = 9.0970e-04
Loss = 1.8793e-01, PNorm = 45.7253, GNorm = 1.0886, lr_0 = 9.0908e-04
Loss = 1.7962e-01, PNorm = 45.7566, GNorm = 1.7378, lr_0 = 9.0846e-04
Loss = 1.7423e-01, PNorm = 45.7682, GNorm = 2.0228, lr_0 = 9.0783e-04
Loss = 1.9064e-01, PNorm = 45.7843, GNorm = 4.6920, lr_0 = 9.0721e-04
Loss = 1.7757e-01, PNorm = 45.8067, GNorm = 1.0187, lr_0 = 9.0659e-04
Loss = 1.5969e-01, PNorm = 45.8414, GNorm = 0.7792, lr_0 = 9.0597e-04
Loss = 1.5289e-01, PNorm = 45.8645, GNorm = 1.5835, lr_0 = 9.0535e-04
Loss = 1.5541e-01, PNorm = 45.8783, GNorm = 2.6607, lr_0 = 9.0473e-04
Loss = 1.8407e-01, PNorm = 45.9021, GNorm = 2.2253, lr_0 = 9.0411e-04
Loss = 1.7705e-01, PNorm = 45.9280, GNorm = 1.8336, lr_0 = 9.0349e-04
Loss = 2.0498e-01, PNorm = 45.9436, GNorm = 2.1979, lr_0 = 9.0287e-04
Loss = 1.4666e-01, PNorm = 45.9616, GNorm = 1.3486, lr_0 = 9.0225e-04
Loss = 1.5746e-01, PNorm = 45.9926, GNorm = 1.2310, lr_0 = 9.0163e-04
Loss = 1.8129e-01, PNorm = 46.0080, GNorm = 2.0078, lr_0 = 9.0102e-04
Loss = 2.1500e-01, PNorm = 46.0290, GNorm = 1.8957, lr_0 = 9.0040e-04
Loss = 1.7522e-01, PNorm = 46.0553, GNorm = 2.1911, lr_0 = 8.9978e-04
Loss = 1.7578e-01, PNorm = 46.0844, GNorm = 3.1075, lr_0 = 8.9916e-04
Loss = 1.8110e-01, PNorm = 46.1126, GNorm = 0.8996, lr_0 = 8.9855e-04
Loss = 1.9096e-01, PNorm = 46.1365, GNorm = 2.8696, lr_0 = 8.9793e-04
Loss = 1.6949e-01, PNorm = 46.1681, GNorm = 1.2249, lr_0 = 8.9732e-04
Loss = 1.6635e-01, PNorm = 46.2072, GNorm = 1.3922, lr_0 = 8.9670e-04
Loss = 1.8677e-01, PNorm = 46.2357, GNorm = 1.5975, lr_0 = 8.9609e-04
Loss = 1.7042e-01, PNorm = 46.2574, GNorm = 0.8220, lr_0 = 8.9548e-04
Loss = 1.6240e-01, PNorm = 46.2818, GNorm = 1.2808, lr_0 = 8.9486e-04
Loss = 1.5418e-01, PNorm = 46.3066, GNorm = 1.6802, lr_0 = 8.9425e-04
Loss = 1.5610e-01, PNorm = 46.3291, GNorm = 2.3630, lr_0 = 8.9364e-04
Loss = 1.4422e-01, PNorm = 46.3502, GNorm = 1.3307, lr_0 = 8.9302e-04
Loss = 1.7608e-01, PNorm = 46.3724, GNorm = 1.8421, lr_0 = 8.9241e-04
Loss = 1.8786e-01, PNorm = 46.3916, GNorm = 1.3228, lr_0 = 8.9180e-04
Loss = 2.2023e-01, PNorm = 46.4041, GNorm = 2.8064, lr_0 = 8.9119e-04
Loss = 2.0622e-01, PNorm = 46.4375, GNorm = 2.2488, lr_0 = 8.9058e-04
Loss = 2.1270e-01, PNorm = 46.4803, GNorm = 1.7089, lr_0 = 8.8997e-04
Loss = 1.8348e-01, PNorm = 46.5074, GNorm = 1.4248, lr_0 = 8.8936e-04
Loss = 1.6046e-01, PNorm = 46.5308, GNorm = 1.1908, lr_0 = 8.8875e-04
Loss = 1.7367e-01, PNorm = 46.5500, GNorm = 4.1929, lr_0 = 8.8814e-04
Loss = 1.5318e-01, PNorm = 46.5732, GNorm = 1.5803, lr_0 = 8.8753e-04
Loss = 1.7148e-01, PNorm = 46.5847, GNorm = 1.2329, lr_0 = 8.8693e-04
Loss = 2.0098e-01, PNorm = 46.6064, GNorm = 2.0793, lr_0 = 8.8632e-04
Loss = 1.9098e-01, PNorm = 46.6271, GNorm = 0.7018, lr_0 = 8.8571e-04
Loss = 1.8034e-01, PNorm = 46.6586, GNorm = 1.4956, lr_0 = 8.8510e-04
Loss = 1.6416e-01, PNorm = 46.6773, GNorm = 2.4402, lr_0 = 8.8450e-04
Loss = 1.6298e-01, PNorm = 46.6974, GNorm = 0.9462, lr_0 = 8.8389e-04
Loss = 1.5858e-01, PNorm = 46.7176, GNorm = 0.7322, lr_0 = 8.8329e-04
Loss = 1.6591e-01, PNorm = 46.7389, GNorm = 1.9497, lr_0 = 8.8268e-04
Loss = 1.5016e-01, PNorm = 46.7557, GNorm = 1.3481, lr_0 = 8.8208e-04
Loss = 1.6051e-01, PNorm = 46.7703, GNorm = 2.2628, lr_0 = 8.8147e-04
Loss = 1.4414e-01, PNorm = 46.7896, GNorm = 0.6570, lr_0 = 8.8087e-04
Loss = 1.7544e-01, PNorm = 46.8142, GNorm = 1.0800, lr_0 = 8.8026e-04
Loss = 1.8758e-01, PNorm = 46.8418, GNorm = 1.4394, lr_0 = 8.7966e-04
Loss = 1.6223e-01, PNorm = 46.8683, GNorm = 0.8859, lr_0 = 8.7906e-04
Loss = 1.8640e-01, PNorm = 46.8858, GNorm = 1.5578, lr_0 = 8.7846e-04
Loss = 1.6889e-01, PNorm = 46.9042, GNorm = 0.6932, lr_0 = 8.7785e-04
Loss = 1.6357e-01, PNorm = 46.9289, GNorm = 0.8515, lr_0 = 8.7725e-04
Loss = 1.5450e-01, PNorm = 46.9549, GNorm = 1.1408, lr_0 = 8.7665e-04
Loss = 1.5669e-01, PNorm = 46.9697, GNorm = 2.5742, lr_0 = 8.7605e-04
Loss = 1.6002e-01, PNorm = 46.9975, GNorm = 1.2788, lr_0 = 8.7545e-04
Loss = 1.8065e-01, PNorm = 47.0240, GNorm = 0.9504, lr_0 = 8.7485e-04
Loss = 1.5533e-01, PNorm = 47.0445, GNorm = 2.7039, lr_0 = 8.7425e-04
Loss = 1.4344e-01, PNorm = 47.0611, GNorm = 2.0364, lr_0 = 8.7365e-04
Loss = 1.5065e-01, PNorm = 47.0829, GNorm = 3.2943, lr_0 = 8.7306e-04
Loss = 1.8364e-01, PNorm = 47.1101, GNorm = 2.2104, lr_0 = 8.7246e-04
Loss = 1.7921e-01, PNorm = 47.1368, GNorm = 2.1955, lr_0 = 8.7186e-04
Loss = 1.8034e-01, PNorm = 47.1581, GNorm = 1.3709, lr_0 = 8.7126e-04
Loss = 1.6168e-01, PNorm = 47.1739, GNorm = 2.9726, lr_0 = 8.7067e-04
Loss = 1.5702e-01, PNorm = 47.1986, GNorm = 1.9958, lr_0 = 8.7007e-04
Loss = 1.6009e-01, PNorm = 47.2219, GNorm = 1.0583, lr_0 = 8.6947e-04
Loss = 1.6782e-01, PNorm = 47.2542, GNorm = 2.2993, lr_0 = 8.6888e-04
Loss = 1.6127e-01, PNorm = 47.2827, GNorm = 0.7739, lr_0 = 8.6828e-04
Loss = 1.4060e-01, PNorm = 47.3083, GNorm = 0.5995, lr_0 = 8.6769e-04
Loss = 1.5969e-01, PNorm = 47.3309, GNorm = 1.4520, lr_0 = 8.6709e-04
Loss = 1.3896e-01, PNorm = 47.3460, GNorm = 1.1064, lr_0 = 8.6650e-04
Loss = 1.7704e-01, PNorm = 47.3682, GNorm = 0.6180, lr_0 = 8.6590e-04
Loss = 1.6833e-01, PNorm = 47.3882, GNorm = 2.1294, lr_0 = 8.6531e-04
Loss = 1.5673e-01, PNorm = 47.4066, GNorm = 0.5349, lr_0 = 8.6472e-04
Loss = 1.8322e-01, PNorm = 47.4269, GNorm = 4.2691, lr_0 = 8.6413e-04
Loss = 1.5152e-01, PNorm = 47.4433, GNorm = 0.8698, lr_0 = 8.6353e-04
Loss = 1.4904e-01, PNorm = 47.4571, GNorm = 1.0718, lr_0 = 8.6294e-04
Loss = 1.5781e-01, PNorm = 47.4749, GNorm = 1.1370, lr_0 = 8.6235e-04
Loss = 1.6158e-01, PNorm = 47.4941, GNorm = 2.3877, lr_0 = 8.6176e-04
Loss = 1.7430e-01, PNorm = 47.5226, GNorm = 0.8421, lr_0 = 8.6117e-04
Loss = 1.5554e-01, PNorm = 47.5419, GNorm = 1.8605, lr_0 = 8.6058e-04
Loss = 1.4957e-01, PNorm = 47.5698, GNorm = 1.0460, lr_0 = 8.5999e-04
Loss = 1.6601e-01, PNorm = 47.5988, GNorm = 0.9382, lr_0 = 8.5940e-04
Loss = 1.3470e-01, PNorm = 47.6221, GNorm = 0.8288, lr_0 = 8.5881e-04
Loss = 1.4806e-01, PNorm = 47.6431, GNorm = 1.9427, lr_0 = 8.5823e-04
Loss = 1.4525e-01, PNorm = 47.6579, GNorm = 0.8797, lr_0 = 8.5764e-04
Loss = 1.6014e-01, PNorm = 47.6676, GNorm = 1.1135, lr_0 = 8.5705e-04
Loss = 1.6037e-01, PNorm = 47.6891, GNorm = 3.1786, lr_0 = 8.5646e-04
Loss = 1.7728e-01, PNorm = 47.7029, GNorm = 2.4217, lr_0 = 8.5588e-04
Loss = 1.8286e-01, PNorm = 47.7268, GNorm = 1.0508, lr_0 = 8.5529e-04
Loss = 1.7689e-01, PNorm = 47.7493, GNorm = 0.6175, lr_0 = 8.5470e-04
Loss = 1.4794e-01, PNorm = 47.7738, GNorm = 1.0722, lr_0 = 8.5412e-04
Loss = 1.5629e-01, PNorm = 47.7959, GNorm = 0.9050, lr_0 = 8.5353e-04
Loss = 1.7414e-01, PNorm = 47.8112, GNorm = 2.7064, lr_0 = 8.5295e-04
Loss = 2.2390e-01, PNorm = 47.8331, GNorm = 3.9308, lr_0 = 8.5236e-04
Loss = 1.8153e-01, PNorm = 47.8564, GNorm = 0.7568, lr_0 = 8.5178e-04
Loss = 1.6315e-01, PNorm = 47.8930, GNorm = 1.1432, lr_0 = 8.5120e-04
Loss = 1.7742e-01, PNorm = 47.9118, GNorm = 2.3258, lr_0 = 8.5061e-04
Loss = 1.8432e-01, PNorm = 47.9310, GNorm = 0.8006, lr_0 = 8.5003e-04
Loss = 1.8810e-01, PNorm = 47.9561, GNorm = 0.6097, lr_0 = 8.4945e-04
Loss = 1.3990e-01, PNorm = 47.9770, GNorm = 0.6668, lr_0 = 8.4887e-04
Loss = 1.3491e-01, PNorm = 48.0009, GNorm = 1.0895, lr_0 = 8.4828e-04
Validation mae = 0.460423
Epoch 4
Loss = 1.6614e-01, PNorm = 48.0147, GNorm = 1.4758, lr_0 = 8.4770e-04
Loss = 1.5782e-01, PNorm = 48.0321, GNorm = 0.9863, lr_0 = 8.4712e-04
Loss = 1.2754e-01, PNorm = 48.0500, GNorm = 1.6329, lr_0 = 8.4654e-04
Loss = 1.3747e-01, PNorm = 48.0628, GNorm = 1.3287, lr_0 = 8.4596e-04
Loss = 1.4641e-01, PNorm = 48.0881, GNorm = 0.5495, lr_0 = 8.4538e-04
Loss = 1.6361e-01, PNorm = 48.1099, GNorm = 1.6324, lr_0 = 8.4480e-04
Loss = 1.6232e-01, PNorm = 48.1313, GNorm = 1.9687, lr_0 = 8.4423e-04
Loss = 1.5368e-01, PNorm = 48.1584, GNorm = 0.6212, lr_0 = 8.4365e-04
Loss = 1.7936e-01, PNorm = 48.1828, GNorm = 1.3214, lr_0 = 8.4307e-04
Loss = 1.5084e-01, PNorm = 48.2042, GNorm = 1.0048, lr_0 = 8.4249e-04
Loss = 1.4643e-01, PNorm = 48.2304, GNorm = 1.1812, lr_0 = 8.4191e-04
Loss = 1.5817e-01, PNorm = 48.2582, GNorm = 2.1291, lr_0 = 8.4134e-04
Loss = 1.6090e-01, PNorm = 48.2773, GNorm = 0.6710, lr_0 = 8.4076e-04
Loss = 1.5730e-01, PNorm = 48.3002, GNorm = 1.6407, lr_0 = 8.4019e-04
Loss = 1.6812e-01, PNorm = 48.3244, GNorm = 1.1655, lr_0 = 8.3961e-04
Loss = 1.5236e-01, PNorm = 48.3529, GNorm = 0.8853, lr_0 = 8.3903e-04
Loss = 1.4473e-01, PNorm = 48.3700, GNorm = 0.9655, lr_0 = 8.3846e-04
Loss = 1.4139e-01, PNorm = 48.3885, GNorm = 1.2444, lr_0 = 8.3789e-04
Loss = 1.3432e-01, PNorm = 48.4098, GNorm = 2.4573, lr_0 = 8.3731e-04
Loss = 1.4621e-01, PNorm = 48.4388, GNorm = 0.7436, lr_0 = 8.3674e-04
Loss = 1.3353e-01, PNorm = 48.4617, GNorm = 1.1139, lr_0 = 8.3616e-04
Loss = 1.5219e-01, PNorm = 48.4840, GNorm = 1.2369, lr_0 = 8.3559e-04
Loss = 1.4604e-01, PNorm = 48.5060, GNorm = 2.5222, lr_0 = 8.3502e-04
Loss = 1.5322e-01, PNorm = 48.5268, GNorm = 0.5899, lr_0 = 8.3445e-04
Loss = 1.4775e-01, PNorm = 48.5479, GNorm = 1.1186, lr_0 = 8.3388e-04
Loss = 1.4420e-01, PNorm = 48.5666, GNorm = 0.4588, lr_0 = 8.3330e-04
Loss = 1.4324e-01, PNorm = 48.5842, GNorm = 0.9067, lr_0 = 8.3273e-04
Loss = 1.8040e-01, PNorm = 48.6066, GNorm = 0.7671, lr_0 = 8.3216e-04
Loss = 1.6922e-01, PNorm = 48.6345, GNorm = 0.8030, lr_0 = 8.3159e-04
Loss = 1.4467e-01, PNorm = 48.6579, GNorm = 0.7468, lr_0 = 8.3102e-04
Loss = 1.6659e-01, PNorm = 48.6787, GNorm = 1.0211, lr_0 = 8.3045e-04
Loss = 1.6503e-01, PNorm = 48.7000, GNorm = 4.0972, lr_0 = 8.2988e-04
Loss = 1.5024e-01, PNorm = 48.7225, GNorm = 0.9737, lr_0 = 8.2932e-04
Loss = 1.4005e-01, PNorm = 48.7459, GNorm = 0.9798, lr_0 = 8.2875e-04
Loss = 1.4827e-01, PNorm = 48.7633, GNorm = 0.6163, lr_0 = 8.2818e-04
Loss = 1.3276e-01, PNorm = 48.7882, GNorm = 0.8468, lr_0 = 8.2761e-04
Loss = 1.4774e-01, PNorm = 48.8134, GNorm = 0.7643, lr_0 = 8.2705e-04
Loss = 1.5445e-01, PNorm = 48.8327, GNorm = 1.0944, lr_0 = 8.2648e-04
Loss = 1.7281e-01, PNorm = 48.8491, GNorm = 1.7885, lr_0 = 8.2591e-04
Loss = 1.4797e-01, PNorm = 48.8761, GNorm = 1.1265, lr_0 = 8.2535e-04
Loss = 1.4822e-01, PNorm = 48.9075, GNorm = 0.8630, lr_0 = 8.2478e-04
Loss = 1.3759e-01, PNorm = 48.9294, GNorm = 0.6396, lr_0 = 8.2422e-04
Loss = 1.3818e-01, PNorm = 48.9526, GNorm = 0.9354, lr_0 = 8.2365e-04
Loss = 1.3562e-01, PNorm = 48.9701, GNorm = 0.7931, lr_0 = 8.2309e-04
Loss = 1.4124e-01, PNorm = 48.9860, GNorm = 0.9039, lr_0 = 8.2252e-04
Loss = 1.4882e-01, PNorm = 49.0029, GNorm = 0.6276, lr_0 = 8.2196e-04
Loss = 1.3771e-01, PNorm = 49.0201, GNorm = 1.1243, lr_0 = 8.2140e-04
Loss = 1.5481e-01, PNorm = 49.0388, GNorm = 0.6900, lr_0 = 8.2084e-04
Loss = 1.4189e-01, PNorm = 49.0600, GNorm = 1.4622, lr_0 = 8.2027e-04
Loss = 1.7966e-01, PNorm = 49.0834, GNorm = 2.1770, lr_0 = 8.1971e-04
Loss = 1.5282e-01, PNorm = 49.1032, GNorm = 1.1233, lr_0 = 8.1915e-04
Loss = 1.4827e-01, PNorm = 49.1248, GNorm = 0.8740, lr_0 = 8.1859e-04
Loss = 1.2889e-01, PNorm = 49.1390, GNorm = 1.2577, lr_0 = 8.1803e-04
Loss = 1.3044e-01, PNorm = 49.1594, GNorm = 1.6517, lr_0 = 8.1747e-04
Loss = 1.5103e-01, PNorm = 49.1818, GNorm = 1.2092, lr_0 = 8.1691e-04
Loss = 1.5315e-01, PNorm = 49.2137, GNorm = 1.8984, lr_0 = 8.1635e-04
Loss = 1.3518e-01, PNorm = 49.2377, GNorm = 0.5106, lr_0 = 8.1579e-04
Loss = 1.2810e-01, PNorm = 49.2484, GNorm = 1.1438, lr_0 = 8.1523e-04
Loss = 1.3480e-01, PNorm = 49.2592, GNorm = 0.8461, lr_0 = 8.1467e-04
Loss = 1.4115e-01, PNorm = 49.2776, GNorm = 1.1195, lr_0 = 8.1411e-04
Loss = 1.3514e-01, PNorm = 49.3003, GNorm = 1.2953, lr_0 = 8.1355e-04
Loss = 1.7903e-01, PNorm = 49.3218, GNorm = 1.7044, lr_0 = 8.1300e-04
Loss = 1.5256e-01, PNorm = 49.3431, GNorm = 1.4361, lr_0 = 8.1244e-04
Loss = 1.3673e-01, PNorm = 49.3650, GNorm = 1.1415, lr_0 = 8.1188e-04
Loss = 1.5481e-01, PNorm = 49.3807, GNorm = 2.0894, lr_0 = 8.1133e-04
Loss = 1.7535e-01, PNorm = 49.4042, GNorm = 1.0051, lr_0 = 8.1077e-04
Loss = 1.4464e-01, PNorm = 49.4270, GNorm = 1.9614, lr_0 = 8.1022e-04
Loss = 1.5656e-01, PNorm = 49.4443, GNorm = 0.5352, lr_0 = 8.0966e-04
Loss = 1.6222e-01, PNorm = 49.4657, GNorm = 2.2754, lr_0 = 8.0911e-04
Loss = 1.7076e-01, PNorm = 49.4828, GNorm = 1.1398, lr_0 = 8.0855e-04
Loss = 1.5960e-01, PNorm = 49.5031, GNorm = 0.6700, lr_0 = 8.0800e-04
Loss = 1.3736e-01, PNorm = 49.5205, GNorm = 3.1014, lr_0 = 8.0745e-04
Loss = 1.6769e-01, PNorm = 49.5359, GNorm = 0.9319, lr_0 = 8.0689e-04
Loss = 1.6707e-01, PNorm = 49.5628, GNorm = 1.5058, lr_0 = 8.0634e-04
Loss = 1.4395e-01, PNorm = 49.5884, GNorm = 1.0844, lr_0 = 8.0579e-04
Loss = 1.5484e-01, PNorm = 49.6083, GNorm = 0.9780, lr_0 = 8.0523e-04
Loss = 1.4100e-01, PNorm = 49.6237, GNorm = 1.2010, lr_0 = 8.0468e-04
Loss = 1.5557e-01, PNorm = 49.6396, GNorm = 0.8640, lr_0 = 8.0413e-04
Loss = 1.5126e-01, PNorm = 49.6575, GNorm = 2.4067, lr_0 = 8.0358e-04
Loss = 1.7304e-01, PNorm = 49.6762, GNorm = 0.8246, lr_0 = 8.0303e-04
Loss = 1.4513e-01, PNorm = 49.7020, GNorm = 1.0673, lr_0 = 8.0248e-04
Loss = 1.5444e-01, PNorm = 49.7272, GNorm = 0.4920, lr_0 = 8.0193e-04
Loss = 1.4543e-01, PNorm = 49.7443, GNorm = 2.9717, lr_0 = 8.0138e-04
Loss = 1.8214e-01, PNorm = 49.7623, GNorm = 2.0076, lr_0 = 8.0083e-04
Loss = 1.7100e-01, PNorm = 49.7850, GNorm = 0.8545, lr_0 = 8.0028e-04
Loss = 1.5471e-01, PNorm = 49.8093, GNorm = 2.4089, lr_0 = 7.9974e-04
Loss = 1.7104e-01, PNorm = 49.8447, GNorm = 1.4421, lr_0 = 7.9919e-04
Loss = 1.3239e-01, PNorm = 49.8699, GNorm = 0.6271, lr_0 = 7.9864e-04
Loss = 1.4241e-01, PNorm = 49.8873, GNorm = 0.8877, lr_0 = 7.9809e-04
Loss = 1.4234e-01, PNorm = 49.9015, GNorm = 1.4625, lr_0 = 7.9755e-04
Loss = 1.5379e-01, PNorm = 49.9220, GNorm = 1.7021, lr_0 = 7.9700e-04
Loss = 1.5841e-01, PNorm = 49.9419, GNorm = 0.8892, lr_0 = 7.9645e-04
Loss = 1.6614e-01, PNorm = 49.9524, GNorm = 0.8195, lr_0 = 7.9591e-04
Loss = 1.5326e-01, PNorm = 49.9665, GNorm = 0.6644, lr_0 = 7.9536e-04
Loss = 1.6297e-01, PNorm = 49.9814, GNorm = 0.9741, lr_0 = 7.9482e-04
Loss = 1.6566e-01, PNorm = 50.0090, GNorm = 0.7983, lr_0 = 7.9427e-04
Loss = 1.4567e-01, PNorm = 50.0306, GNorm = 0.9447, lr_0 = 7.9373e-04
Loss = 1.6125e-01, PNorm = 50.0472, GNorm = 0.9431, lr_0 = 7.9319e-04
Loss = 1.8388e-01, PNorm = 50.0606, GNorm = 3.1014, lr_0 = 7.9264e-04
Loss = 1.6039e-01, PNorm = 50.0866, GNorm = 0.7246, lr_0 = 7.9210e-04
Loss = 1.6067e-01, PNorm = 50.1172, GNorm = 1.0261, lr_0 = 7.9156e-04
Loss = 1.5599e-01, PNorm = 50.1435, GNorm = 1.1242, lr_0 = 7.9101e-04
Loss = 1.5813e-01, PNorm = 50.1696, GNorm = 0.6557, lr_0 = 7.9047e-04
Loss = 1.7476e-01, PNorm = 50.1879, GNorm = 1.3008, lr_0 = 7.8993e-04
Loss = 1.5652e-01, PNorm = 50.2009, GNorm = 1.6861, lr_0 = 7.8939e-04
Loss = 1.5224e-01, PNorm = 50.2222, GNorm = 1.9119, lr_0 = 7.8885e-04
Loss = 1.6058e-01, PNorm = 50.2448, GNorm = 0.9378, lr_0 = 7.8831e-04
Loss = 1.3490e-01, PNorm = 50.2655, GNorm = 1.1726, lr_0 = 7.8777e-04
Loss = 1.4393e-01, PNorm = 50.2863, GNorm = 0.7107, lr_0 = 7.8723e-04
Loss = 1.5873e-01, PNorm = 50.3073, GNorm = 0.9147, lr_0 = 7.8669e-04
Loss = 1.5705e-01, PNorm = 50.3204, GNorm = 0.5763, lr_0 = 7.8615e-04
Loss = 1.4752e-01, PNorm = 50.3409, GNorm = 1.0676, lr_0 = 7.8561e-04
Loss = 1.4022e-01, PNorm = 50.3607, GNorm = 0.6674, lr_0 = 7.8507e-04
Loss = 1.6296e-01, PNorm = 50.3781, GNorm = 1.5932, lr_0 = 7.8454e-04
Loss = 1.6434e-01, PNorm = 50.3977, GNorm = 0.8308, lr_0 = 7.8400e-04
Loss = 1.8960e-01, PNorm = 50.4225, GNorm = 2.6658, lr_0 = 7.8346e-04
Loss = 1.4603e-01, PNorm = 50.4460, GNorm = 0.7910, lr_0 = 7.8293e-04
Loss = 1.3002e-01, PNorm = 50.4645, GNorm = 0.7448, lr_0 = 7.8239e-04
Loss = 1.2990e-01, PNorm = 50.4861, GNorm = 0.9093, lr_0 = 7.8185e-04
Loss = 1.3547e-01, PNorm = 50.5062, GNorm = 0.7982, lr_0 = 7.8132e-04
Validation mae = 0.438481
Epoch 5
Loss = 1.5417e-01, PNorm = 50.5239, GNorm = 0.5317, lr_0 = 7.8078e-04
Loss = 1.2559e-01, PNorm = 50.5463, GNorm = 1.2831, lr_0 = 7.8025e-04
Loss = 1.5937e-01, PNorm = 50.5690, GNorm = 0.8817, lr_0 = 7.7971e-04
Loss = 1.2005e-01, PNorm = 50.5849, GNorm = 1.4435, lr_0 = 7.7918e-04
Loss = 1.3794e-01, PNorm = 50.6005, GNorm = 0.6809, lr_0 = 7.7864e-04
Loss = 1.2926e-01, PNorm = 50.6194, GNorm = 1.3231, lr_0 = 7.7811e-04
Loss = 1.2340e-01, PNorm = 50.6379, GNorm = 1.8681, lr_0 = 7.7758e-04
Loss = 1.4292e-01, PNorm = 50.6537, GNorm = 1.8042, lr_0 = 7.7705e-04
Loss = 1.2810e-01, PNorm = 50.6641, GNorm = 1.2947, lr_0 = 7.7651e-04
Loss = 1.1883e-01, PNorm = 50.6774, GNorm = 0.9833, lr_0 = 7.7598e-04
Loss = 1.3670e-01, PNorm = 50.6938, GNorm = 1.3993, lr_0 = 7.7545e-04
Loss = 1.3061e-01, PNorm = 50.7082, GNorm = 1.7129, lr_0 = 7.7492e-04
Loss = 1.4386e-01, PNorm = 50.7242, GNorm = 1.0216, lr_0 = 7.7439e-04
Loss = 1.2791e-01, PNorm = 50.7418, GNorm = 1.5296, lr_0 = 7.7386e-04
Loss = 1.3751e-01, PNorm = 50.7610, GNorm = 0.9779, lr_0 = 7.7333e-04
Loss = 1.2491e-01, PNorm = 50.7706, GNorm = 0.7142, lr_0 = 7.7280e-04
Loss = 1.2043e-01, PNorm = 50.7851, GNorm = 0.9862, lr_0 = 7.7227e-04
Loss = 1.2803e-01, PNorm = 50.7929, GNorm = 0.8672, lr_0 = 7.7174e-04
Loss = 1.4035e-01, PNorm = 50.8157, GNorm = 0.8142, lr_0 = 7.7121e-04
Loss = 1.2665e-01, PNorm = 50.8330, GNorm = 1.0289, lr_0 = 7.7068e-04
Loss = 1.5057e-01, PNorm = 50.8545, GNorm = 0.9133, lr_0 = 7.7015e-04
Loss = 1.6025e-01, PNorm = 50.8892, GNorm = 1.5549, lr_0 = 7.6963e-04
Loss = 1.5809e-01, PNorm = 50.9122, GNorm = 1.3971, lr_0 = 7.6910e-04
Loss = 1.5356e-01, PNorm = 50.9390, GNorm = 1.8722, lr_0 = 7.6857e-04
Loss = 1.4895e-01, PNorm = 50.9759, GNorm = 0.9974, lr_0 = 7.6805e-04
Loss = 1.5513e-01, PNorm = 50.9953, GNorm = 0.8361, lr_0 = 7.6752e-04
Loss = 1.3256e-01, PNorm = 51.0124, GNorm = 0.7678, lr_0 = 7.6699e-04
Loss = 1.3419e-01, PNorm = 51.0307, GNorm = 0.8396, lr_0 = 7.6647e-04
Loss = 1.4378e-01, PNorm = 51.0509, GNorm = 1.2808, lr_0 = 7.6594e-04
Loss = 1.4568e-01, PNorm = 51.0710, GNorm = 2.0629, lr_0 = 7.6542e-04
Loss = 1.3737e-01, PNorm = 51.0975, GNorm = 1.1675, lr_0 = 7.6489e-04
Loss = 1.6033e-01, PNorm = 51.1162, GNorm = 1.4772, lr_0 = 7.6437e-04
Loss = 1.2835e-01, PNorm = 51.1427, GNorm = 0.9594, lr_0 = 7.6385e-04
Loss = 1.5145e-01, PNorm = 51.1586, GNorm = 1.1755, lr_0 = 7.6332e-04
Loss = 1.3180e-01, PNorm = 51.1728, GNorm = 0.9801, lr_0 = 7.6280e-04
Loss = 1.3827e-01, PNorm = 51.1897, GNorm = 1.4902, lr_0 = 7.6228e-04
Loss = 1.4870e-01, PNorm = 51.2138, GNorm = 1.8074, lr_0 = 7.6176e-04
Loss = 1.3314e-01, PNorm = 51.2358, GNorm = 1.6944, lr_0 = 7.6123e-04
Loss = 1.2875e-01, PNorm = 51.2541, GNorm = 1.7233, lr_0 = 7.6071e-04
Loss = 1.4105e-01, PNorm = 51.2733, GNorm = 2.4703, lr_0 = 7.6019e-04
Loss = 1.3495e-01, PNorm = 51.3057, GNorm = 0.7611, lr_0 = 7.5967e-04
Loss = 1.3948e-01, PNorm = 51.3236, GNorm = 0.6951, lr_0 = 7.5915e-04
Loss = 1.4353e-01, PNorm = 51.3417, GNorm = 1.2271, lr_0 = 7.5863e-04
Loss = 1.3588e-01, PNorm = 51.3556, GNorm = 0.6641, lr_0 = 7.5811e-04
Loss = 1.2514e-01, PNorm = 51.3786, GNorm = 0.7077, lr_0 = 7.5759e-04
Loss = 1.4984e-01, PNorm = 51.3911, GNorm = 0.9303, lr_0 = 7.5707e-04
Loss = 1.3721e-01, PNorm = 51.4078, GNorm = 0.6121, lr_0 = 7.5655e-04
Loss = 1.2152e-01, PNorm = 51.4234, GNorm = 0.5796, lr_0 = 7.5603e-04
Loss = 1.3480e-01, PNorm = 51.4437, GNorm = 1.2191, lr_0 = 7.5552e-04
Loss = 1.3239e-01, PNorm = 51.4601, GNorm = 1.3129, lr_0 = 7.5500e-04
Loss = 1.3393e-01, PNorm = 51.4874, GNorm = 0.8711, lr_0 = 7.5448e-04
Loss = 1.3265e-01, PNorm = 51.5087, GNorm = 1.9849, lr_0 = 7.5397e-04
Loss = 1.4134e-01, PNorm = 51.5315, GNorm = 0.7374, lr_0 = 7.5345e-04
Loss = 1.3630e-01, PNorm = 51.5557, GNorm = 0.6539, lr_0 = 7.5293e-04
Loss = 1.4197e-01, PNorm = 51.5673, GNorm = 0.9092, lr_0 = 7.5242e-04
Loss = 1.3523e-01, PNorm = 51.5897, GNorm = 0.8246, lr_0 = 7.5190e-04
Loss = 1.2699e-01, PNorm = 51.6119, GNorm = 0.6922, lr_0 = 7.5139e-04
Loss = 1.3905e-01, PNorm = 51.6233, GNorm = 1.6801, lr_0 = 7.5087e-04
Loss = 1.2824e-01, PNorm = 51.6389, GNorm = 0.6691, lr_0 = 7.5036e-04
Loss = 1.5724e-01, PNorm = 51.6534, GNorm = 0.7466, lr_0 = 7.4984e-04
Loss = 1.3023e-01, PNorm = 51.6720, GNorm = 1.0994, lr_0 = 7.4933e-04
Loss = 1.2922e-01, PNorm = 51.6946, GNorm = 1.3510, lr_0 = 7.4882e-04
Loss = 1.2564e-01, PNorm = 51.7184, GNorm = 0.6417, lr_0 = 7.4830e-04
Loss = 1.2180e-01, PNorm = 51.7309, GNorm = 0.7849, lr_0 = 7.4779e-04
Loss = 1.2799e-01, PNorm = 51.7416, GNorm = 0.9458, lr_0 = 7.4728e-04
Loss = 1.5264e-01, PNorm = 51.7597, GNorm = 1.5795, lr_0 = 7.4677e-04
Loss = 1.4110e-01, PNorm = 51.7742, GNorm = 1.2591, lr_0 = 7.4625e-04
Loss = 1.3279e-01, PNorm = 51.7910, GNorm = 0.8554, lr_0 = 7.4574e-04
Loss = 1.6908e-01, PNorm = 51.8090, GNorm = 0.9781, lr_0 = 7.4523e-04
Loss = 1.7422e-01, PNorm = 51.8318, GNorm = 3.8723, lr_0 = 7.4472e-04
Loss = 1.5381e-01, PNorm = 51.8583, GNorm = 0.6938, lr_0 = 7.4421e-04
Loss = 1.5042e-01, PNorm = 51.8887, GNorm = 0.8292, lr_0 = 7.4370e-04
Loss = 1.4946e-01, PNorm = 51.9166, GNorm = 1.2674, lr_0 = 7.4319e-04
Loss = 1.5554e-01, PNorm = 51.9347, GNorm = 1.4379, lr_0 = 7.4268e-04
Loss = 1.2425e-01, PNorm = 51.9561, GNorm = 0.7305, lr_0 = 7.4217e-04
Loss = 1.4702e-01, PNorm = 51.9715, GNorm = 0.8848, lr_0 = 7.4167e-04
Loss = 1.4034e-01, PNorm = 51.9908, GNorm = 0.7202, lr_0 = 7.4116e-04
Loss = 1.4588e-01, PNorm = 52.0052, GNorm = 1.0102, lr_0 = 7.4065e-04
Loss = 1.2929e-01, PNorm = 52.0230, GNorm = 0.8628, lr_0 = 7.4014e-04
Loss = 1.4073e-01, PNorm = 52.0429, GNorm = 0.7934, lr_0 = 7.3964e-04
Loss = 1.5286e-01, PNorm = 52.0744, GNorm = 2.1237, lr_0 = 7.3913e-04
Loss = 1.3792e-01, PNorm = 52.0992, GNorm = 0.6885, lr_0 = 7.3862e-04
Loss = 1.3329e-01, PNorm = 52.1158, GNorm = 0.8583, lr_0 = 7.3812e-04
Loss = 1.4286e-01, PNorm = 52.1360, GNorm = 0.7033, lr_0 = 7.3761e-04
Loss = 1.3124e-01, PNorm = 52.1515, GNorm = 0.6143, lr_0 = 7.3711e-04
Loss = 1.4678e-01, PNorm = 52.1631, GNorm = 0.8925, lr_0 = 7.3660e-04
Loss = 1.3663e-01, PNorm = 52.1767, GNorm = 1.6269, lr_0 = 7.3610e-04
Loss = 1.3227e-01, PNorm = 52.1966, GNorm = 0.7236, lr_0 = 7.3559e-04
Loss = 1.3936e-01, PNorm = 52.2197, GNorm = 0.9221, lr_0 = 7.3509e-04
Loss = 1.4018e-01, PNorm = 52.2507, GNorm = 1.2898, lr_0 = 7.3458e-04
Loss = 1.2788e-01, PNorm = 52.2672, GNorm = 0.8095, lr_0 = 7.3408e-04
Loss = 1.4475e-01, PNorm = 52.2813, GNorm = 1.3792, lr_0 = 7.3358e-04
Loss = 1.4325e-01, PNorm = 52.2960, GNorm = 0.9282, lr_0 = 7.3308e-04
Loss = 1.2522e-01, PNorm = 52.3135, GNorm = 0.7439, lr_0 = 7.3257e-04
Loss = 1.2967e-01, PNorm = 52.3236, GNorm = 0.9542, lr_0 = 7.3207e-04
Loss = 1.5011e-01, PNorm = 52.3409, GNorm = 0.7193, lr_0 = 7.3157e-04
Loss = 1.4173e-01, PNorm = 52.3579, GNorm = 0.8562, lr_0 = 7.3107e-04
Loss = 1.2831e-01, PNorm = 52.3752, GNorm = 1.2803, lr_0 = 7.3057e-04
Loss = 1.5490e-01, PNorm = 52.3856, GNorm = 1.8390, lr_0 = 7.3007e-04
Loss = 1.5413e-01, PNorm = 52.4051, GNorm = 0.5702, lr_0 = 7.2957e-04
Loss = 1.6205e-01, PNorm = 52.4324, GNorm = 1.8720, lr_0 = 7.2907e-04
Loss = 1.3527e-01, PNorm = 52.4579, GNorm = 0.7714, lr_0 = 7.2857e-04
Loss = 1.2917e-01, PNorm = 52.4719, GNorm = 0.7533, lr_0 = 7.2807e-04
Loss = 1.3619e-01, PNorm = 52.4857, GNorm = 0.5157, lr_0 = 7.2757e-04
Loss = 1.2822e-01, PNorm = 52.5000, GNorm = 0.4986, lr_0 = 7.2707e-04
Loss = 1.2556e-01, PNorm = 52.5140, GNorm = 0.9091, lr_0 = 7.2657e-04
Loss = 1.4570e-01, PNorm = 52.5331, GNorm = 0.8985, lr_0 = 7.2608e-04
Loss = 1.5258e-01, PNorm = 52.5493, GNorm = 1.7098, lr_0 = 7.2558e-04
Loss = 1.2585e-01, PNorm = 52.5664, GNorm = 1.0113, lr_0 = 7.2508e-04
Loss = 1.3449e-01, PNorm = 52.5868, GNorm = 1.0894, lr_0 = 7.2458e-04
Loss = 1.1932e-01, PNorm = 52.6097, GNorm = 1.1320, lr_0 = 7.2409e-04
Loss = 1.3840e-01, PNorm = 52.6322, GNorm = 0.7761, lr_0 = 7.2359e-04
Loss = 1.3524e-01, PNorm = 52.6485, GNorm = 2.0057, lr_0 = 7.2310e-04
Loss = 1.3807e-01, PNorm = 52.6576, GNorm = 1.3760, lr_0 = 7.2260e-04
Loss = 1.4307e-01, PNorm = 52.6698, GNorm = 2.3536, lr_0 = 7.2211e-04
Loss = 1.5964e-01, PNorm = 52.6939, GNorm = 0.7238, lr_0 = 7.2161e-04
Loss = 1.6502e-01, PNorm = 52.7242, GNorm = 0.7430, lr_0 = 7.2112e-04
Loss = 1.5338e-01, PNorm = 52.7514, GNorm = 2.3144, lr_0 = 7.2062e-04
Loss = 1.5564e-01, PNorm = 52.7689, GNorm = 1.0400, lr_0 = 7.2013e-04
Loss = 1.2491e-01, PNorm = 52.7923, GNorm = 1.7308, lr_0 = 7.1964e-04
Validation mae = 0.456414
Epoch 6
Loss = 1.2019e-01, PNorm = 52.8149, GNorm = 0.9491, lr_0 = 7.1914e-04
Loss = 1.3792e-01, PNorm = 52.8374, GNorm = 0.6537, lr_0 = 7.1865e-04
Loss = 1.4409e-01, PNorm = 52.8628, GNorm = 0.7039, lr_0 = 7.1816e-04
Loss = 1.2610e-01, PNorm = 52.8832, GNorm = 0.8951, lr_0 = 7.1767e-04
Loss = 1.3069e-01, PNorm = 52.9045, GNorm = 1.6442, lr_0 = 7.1717e-04
Loss = 1.2774e-01, PNorm = 52.9319, GNorm = 0.6517, lr_0 = 7.1668e-04
Loss = 1.1101e-01, PNorm = 52.9530, GNorm = 1.1255, lr_0 = 7.1619e-04
Loss = 1.2909e-01, PNorm = 52.9698, GNorm = 0.6388, lr_0 = 7.1570e-04
Loss = 1.1603e-01, PNorm = 52.9872, GNorm = 0.6803, lr_0 = 7.1521e-04
Loss = 1.3198e-01, PNorm = 53.0037, GNorm = 0.5262, lr_0 = 7.1472e-04
Loss = 1.1397e-01, PNorm = 53.0165, GNorm = 0.4972, lr_0 = 7.1423e-04
Loss = 1.2937e-01, PNorm = 53.0356, GNorm = 0.6633, lr_0 = 7.1374e-04
Loss = 1.1490e-01, PNorm = 53.0522, GNorm = 0.5280, lr_0 = 7.1325e-04
Loss = 1.0991e-01, PNorm = 53.0652, GNorm = 0.6446, lr_0 = 7.1277e-04
Loss = 1.2417e-01, PNorm = 53.0841, GNorm = 0.6977, lr_0 = 7.1228e-04
Loss = 1.4809e-01, PNorm = 53.0975, GNorm = 1.6875, lr_0 = 7.1179e-04
Loss = 1.2035e-01, PNorm = 53.1125, GNorm = 1.3357, lr_0 = 7.1130e-04
Loss = 1.2211e-01, PNorm = 53.1361, GNorm = 1.5674, lr_0 = 7.1081e-04
Loss = 1.3877e-01, PNorm = 53.1563, GNorm = 0.8591, lr_0 = 7.1033e-04
Loss = 1.1823e-01, PNorm = 53.1748, GNorm = 0.8589, lr_0 = 7.0984e-04
Loss = 1.3842e-01, PNorm = 53.2003, GNorm = 0.8937, lr_0 = 7.0935e-04
Loss = 1.3439e-01, PNorm = 53.2162, GNorm = 0.5682, lr_0 = 7.0887e-04
Loss = 1.3167e-01, PNorm = 53.2336, GNorm = 1.2927, lr_0 = 7.0838e-04
Loss = 1.2611e-01, PNorm = 53.2529, GNorm = 1.0091, lr_0 = 7.0790e-04
Loss = 1.3513e-01, PNorm = 53.2676, GNorm = 1.1887, lr_0 = 7.0741e-04
Loss = 1.0812e-01, PNorm = 53.2822, GNorm = 1.4000, lr_0 = 7.0693e-04
Loss = 1.0960e-01, PNorm = 53.3013, GNorm = 0.6254, lr_0 = 7.0644e-04
Loss = 1.3198e-01, PNorm = 53.3173, GNorm = 0.6676, lr_0 = 7.0596e-04
Loss = 1.0630e-01, PNorm = 53.3283, GNorm = 0.4305, lr_0 = 7.0548e-04
Loss = 1.4660e-01, PNorm = 53.3359, GNorm = 1.1803, lr_0 = 7.0499e-04
Loss = 1.2467e-01, PNorm = 53.3513, GNorm = 1.3213, lr_0 = 7.0451e-04
Loss = 1.4287e-01, PNorm = 53.3775, GNorm = 0.7497, lr_0 = 7.0403e-04
Loss = 1.4265e-01, PNorm = 53.4030, GNorm = 1.0378, lr_0 = 7.0354e-04
Loss = 1.5506e-01, PNorm = 53.4279, GNorm = 2.0243, lr_0 = 7.0306e-04
Loss = 1.4815e-01, PNorm = 53.4546, GNorm = 0.8032, lr_0 = 7.0258e-04
Loss = 1.0480e-01, PNorm = 53.4773, GNorm = 0.6670, lr_0 = 7.0210e-04
Loss = 1.1809e-01, PNorm = 53.4912, GNorm = 0.9243, lr_0 = 7.0162e-04
Loss = 1.2724e-01, PNorm = 53.5080, GNorm = 0.9924, lr_0 = 7.0114e-04
Loss = 1.0960e-01, PNorm = 53.5218, GNorm = 0.8558, lr_0 = 7.0066e-04
Loss = 1.2501e-01, PNorm = 53.5406, GNorm = 1.3017, lr_0 = 7.0018e-04
Loss = 1.0231e-01, PNorm = 53.5519, GNorm = 0.6493, lr_0 = 6.9970e-04
Loss = 1.1874e-01, PNorm = 53.5604, GNorm = 0.9021, lr_0 = 6.9922e-04
Loss = 1.2912e-01, PNorm = 53.5789, GNorm = 0.9274, lr_0 = 6.9874e-04
Loss = 1.2872e-01, PNorm = 53.5967, GNorm = 0.7939, lr_0 = 6.9826e-04
Loss = 1.1216e-01, PNorm = 53.6165, GNorm = 1.3722, lr_0 = 6.9778e-04
Loss = 1.5290e-01, PNorm = 53.6333, GNorm = 0.6062, lr_0 = 6.9730e-04
Loss = 1.2039e-01, PNorm = 53.6447, GNorm = 1.6164, lr_0 = 6.9683e-04
Loss = 1.3889e-01, PNorm = 53.6583, GNorm = 1.7488, lr_0 = 6.9635e-04
Loss = 1.1964e-01, PNorm = 53.6710, GNorm = 1.5217, lr_0 = 6.9587e-04
Loss = 1.2047e-01, PNorm = 53.6909, GNorm = 1.3091, lr_0 = 6.9540e-04
Loss = 1.1144e-01, PNorm = 53.7060, GNorm = 1.7519, lr_0 = 6.9492e-04
Loss = 1.4444e-01, PNorm = 53.7171, GNorm = 0.5211, lr_0 = 6.9444e-04
Loss = 1.5735e-01, PNorm = 53.7333, GNorm = 2.7881, lr_0 = 6.9397e-04
Loss = 1.4704e-01, PNorm = 53.7524, GNorm = 0.5588, lr_0 = 6.9349e-04
Loss = 1.2420e-01, PNorm = 53.7710, GNorm = 0.6666, lr_0 = 6.9302e-04
Loss = 1.1946e-01, PNorm = 53.7853, GNorm = 2.2051, lr_0 = 6.9254e-04
Loss = 1.2668e-01, PNorm = 53.8014, GNorm = 1.0879, lr_0 = 6.9207e-04
Loss = 1.2464e-01, PNorm = 53.8152, GNorm = 2.0178, lr_0 = 6.9159e-04
Loss = 1.4053e-01, PNorm = 53.8317, GNorm = 1.5740, lr_0 = 6.9112e-04
Loss = 1.2419e-01, PNorm = 53.8465, GNorm = 0.5843, lr_0 = 6.9065e-04
Loss = 1.2741e-01, PNorm = 53.8581, GNorm = 0.6331, lr_0 = 6.9017e-04
Loss = 1.4193e-01, PNorm = 53.8743, GNorm = 0.7585, lr_0 = 6.8970e-04
Loss = 1.2709e-01, PNorm = 53.8894, GNorm = 1.5521, lr_0 = 6.8923e-04
Loss = 1.2829e-01, PNorm = 53.9126, GNorm = 1.5266, lr_0 = 6.8876e-04
Loss = 1.3802e-01, PNorm = 53.9375, GNorm = 1.3694, lr_0 = 6.8828e-04
Loss = 1.3533e-01, PNorm = 53.9570, GNorm = 1.0237, lr_0 = 6.8781e-04
Loss = 1.3389e-01, PNorm = 53.9724, GNorm = 2.2758, lr_0 = 6.8734e-04
Loss = 1.4880e-01, PNorm = 53.9882, GNorm = 0.8909, lr_0 = 6.8687e-04
Loss = 1.1561e-01, PNorm = 53.9952, GNorm = 0.7582, lr_0 = 6.8640e-04
Loss = 1.3638e-01, PNorm = 54.0115, GNorm = 0.8176, lr_0 = 6.8593e-04
Loss = 1.2969e-01, PNorm = 54.0303, GNorm = 0.5819, lr_0 = 6.8546e-04
Loss = 1.2237e-01, PNorm = 54.0464, GNorm = 0.6619, lr_0 = 6.8499e-04
Loss = 1.3030e-01, PNorm = 54.0677, GNorm = 0.8286, lr_0 = 6.8452e-04
Loss = 1.1441e-01, PNorm = 54.0849, GNorm = 0.7268, lr_0 = 6.8405e-04
Loss = 1.4958e-01, PNorm = 54.1042, GNorm = 1.1282, lr_0 = 6.8358e-04
Loss = 1.3024e-01, PNorm = 54.1235, GNorm = 1.4355, lr_0 = 6.8312e-04
Loss = 1.2131e-01, PNorm = 54.1399, GNorm = 0.8129, lr_0 = 6.8265e-04
Loss = 1.2490e-01, PNorm = 54.1505, GNorm = 1.3521, lr_0 = 6.8218e-04
Loss = 1.2199e-01, PNorm = 54.1615, GNorm = 1.1916, lr_0 = 6.8171e-04
Loss = 1.4593e-01, PNorm = 54.1773, GNorm = 0.8100, lr_0 = 6.8125e-04
Loss = 1.1896e-01, PNorm = 54.1969, GNorm = 2.0672, lr_0 = 6.8078e-04
Loss = 1.2594e-01, PNorm = 54.2130, GNorm = 1.5405, lr_0 = 6.8031e-04
Loss = 1.3113e-01, PNorm = 54.2357, GNorm = 0.8449, lr_0 = 6.7985e-04
Loss = 1.2935e-01, PNorm = 54.2531, GNorm = 1.9552, lr_0 = 6.7938e-04
Loss = 1.4468e-01, PNorm = 54.2748, GNorm = 1.8471, lr_0 = 6.7892e-04
Loss = 1.3798e-01, PNorm = 54.2979, GNorm = 0.6945, lr_0 = 6.7845e-04
Loss = 1.3392e-01, PNorm = 54.3189, GNorm = 1.2028, lr_0 = 6.7799e-04
Loss = 1.2505e-01, PNorm = 54.3396, GNorm = 0.4928, lr_0 = 6.7752e-04
Loss = 1.1524e-01, PNorm = 54.3568, GNorm = 1.3712, lr_0 = 6.7706e-04
Loss = 1.2150e-01, PNorm = 54.3654, GNorm = 0.6937, lr_0 = 6.7659e-04
Loss = 1.2808e-01, PNorm = 54.3823, GNorm = 1.2515, lr_0 = 6.7613e-04
Loss = 1.1830e-01, PNorm = 54.3953, GNorm = 0.7952, lr_0 = 6.7567e-04
Loss = 1.4430e-01, PNorm = 54.4123, GNorm = 0.5570, lr_0 = 6.7520e-04
Loss = 1.0237e-01, PNorm = 54.4269, GNorm = 0.4329, lr_0 = 6.7474e-04
Loss = 1.2246e-01, PNorm = 54.4380, GNorm = 0.6643, lr_0 = 6.7428e-04
Loss = 1.3006e-01, PNorm = 54.4544, GNorm = 1.1793, lr_0 = 6.7382e-04
Loss = 1.3450e-01, PNorm = 54.4720, GNorm = 0.7273, lr_0 = 6.7335e-04
Loss = 1.2460e-01, PNorm = 54.4796, GNorm = 0.6434, lr_0 = 6.7289e-04
Loss = 1.2541e-01, PNorm = 54.4992, GNorm = 1.1661, lr_0 = 6.7243e-04
Loss = 1.1782e-01, PNorm = 54.5209, GNorm = 0.5296, lr_0 = 6.7197e-04
Loss = 1.2181e-01, PNorm = 54.5393, GNorm = 0.8458, lr_0 = 6.7151e-04
Loss = 1.3000e-01, PNorm = 54.5513, GNorm = 1.3555, lr_0 = 6.7105e-04
Loss = 1.3086e-01, PNorm = 54.5597, GNorm = 0.9170, lr_0 = 6.7059e-04
Loss = 1.4887e-01, PNorm = 54.5746, GNorm = 0.9353, lr_0 = 6.7013e-04
Loss = 1.3998e-01, PNorm = 54.5942, GNorm = 0.7783, lr_0 = 6.6967e-04
Loss = 1.3054e-01, PNorm = 54.6171, GNorm = 1.0106, lr_0 = 6.6921e-04
Loss = 1.0969e-01, PNorm = 54.6366, GNorm = 0.6175, lr_0 = 6.6876e-04
Loss = 1.3281e-01, PNorm = 54.6481, GNorm = 1.1060, lr_0 = 6.6830e-04
Loss = 1.2568e-01, PNorm = 54.6618, GNorm = 0.6427, lr_0 = 6.6784e-04
Loss = 1.1525e-01, PNorm = 54.6739, GNorm = 0.7221, lr_0 = 6.6738e-04
Loss = 1.3362e-01, PNorm = 54.6861, GNorm = 0.9774, lr_0 = 6.6693e-04
Loss = 1.3733e-01, PNorm = 54.7059, GNorm = 0.5884, lr_0 = 6.6647e-04
Loss = 1.2841e-01, PNorm = 54.7253, GNorm = 1.4375, lr_0 = 6.6601e-04
Loss = 1.3932e-01, PNorm = 54.7341, GNorm = 0.7349, lr_0 = 6.6556e-04
Loss = 1.2679e-01, PNorm = 54.7448, GNorm = 0.8965, lr_0 = 6.6510e-04
Loss = 1.3541e-01, PNorm = 54.7550, GNorm = 0.5644, lr_0 = 6.6464e-04
Loss = 1.4610e-01, PNorm = 54.7691, GNorm = 1.2469, lr_0 = 6.6419e-04
Loss = 1.3126e-01, PNorm = 54.7913, GNorm = 0.6091, lr_0 = 6.6373e-04
Loss = 1.1985e-01, PNorm = 54.8152, GNorm = 0.8188, lr_0 = 6.6328e-04
Loss = 1.1055e-01, PNorm = 54.8339, GNorm = 0.7686, lr_0 = 6.6282e-04
Validation mae = 0.431254
Epoch 7
Loss = 1.1609e-01, PNorm = 54.8419, GNorm = 0.9339, lr_0 = 6.6237e-04
Loss = 1.1203e-01, PNorm = 54.8563, GNorm = 1.5200, lr_0 = 6.6192e-04
Loss = 1.1692e-01, PNorm = 54.8768, GNorm = 1.4449, lr_0 = 6.6146e-04
Loss = 1.1451e-01, PNorm = 54.8899, GNorm = 0.9104, lr_0 = 6.6101e-04
Loss = 1.1889e-01, PNorm = 54.9011, GNorm = 0.7958, lr_0 = 6.6056e-04
Loss = 1.0433e-01, PNorm = 54.9136, GNorm = 0.6179, lr_0 = 6.6011e-04
Loss = 1.2941e-01, PNorm = 54.9342, GNorm = 1.8024, lr_0 = 6.5965e-04
Loss = 1.2661e-01, PNorm = 54.9529, GNorm = 1.0180, lr_0 = 6.5920e-04
Loss = 1.0863e-01, PNorm = 54.9666, GNorm = 0.6665, lr_0 = 6.5875e-04
Loss = 1.2858e-01, PNorm = 54.9859, GNorm = 1.2257, lr_0 = 6.5830e-04
Loss = 1.2624e-01, PNorm = 55.0118, GNorm = 0.5159, lr_0 = 6.5785e-04
Loss = 1.1716e-01, PNorm = 55.0282, GNorm = 0.5156, lr_0 = 6.5740e-04
Loss = 9.4064e-02, PNorm = 55.0480, GNorm = 0.4666, lr_0 = 6.5695e-04
Loss = 1.2827e-01, PNorm = 55.0679, GNorm = 2.0099, lr_0 = 6.5650e-04
Loss = 1.3001e-01, PNorm = 55.0948, GNorm = 0.6157, lr_0 = 6.5605e-04
Loss = 1.3178e-01, PNorm = 55.1172, GNorm = 2.8393, lr_0 = 6.5560e-04
Loss = 1.2427e-01, PNorm = 55.1336, GNorm = 1.7968, lr_0 = 6.5515e-04
Loss = 1.1555e-01, PNorm = 55.1560, GNorm = 0.7996, lr_0 = 6.5470e-04
Loss = 1.3771e-01, PNorm = 55.1743, GNorm = 1.2225, lr_0 = 6.5425e-04
Loss = 1.2147e-01, PNorm = 55.1967, GNorm = 1.3529, lr_0 = 6.5380e-04
Loss = 1.1796e-01, PNorm = 55.2187, GNorm = 0.5062, lr_0 = 6.5335e-04
Loss = 1.0363e-01, PNorm = 55.2355, GNorm = 1.1245, lr_0 = 6.5291e-04
Loss = 1.1461e-01, PNorm = 55.2514, GNorm = 0.7991, lr_0 = 6.5246e-04
Loss = 1.0793e-01, PNorm = 55.2675, GNorm = 0.5951, lr_0 = 6.5201e-04
Loss = 1.3447e-01, PNorm = 55.2833, GNorm = 0.9417, lr_0 = 6.5157e-04
Loss = 1.2694e-01, PNorm = 55.3042, GNorm = 0.8095, lr_0 = 6.5112e-04
Loss = 1.1400e-01, PNorm = 55.3197, GNorm = 0.8571, lr_0 = 6.5067e-04
Loss = 1.2635e-01, PNorm = 55.3378, GNorm = 0.8557, lr_0 = 6.5023e-04
Loss = 1.1690e-01, PNorm = 55.3582, GNorm = 0.6288, lr_0 = 6.4978e-04
Loss = 1.3976e-01, PNorm = 55.3793, GNorm = 0.5184, lr_0 = 6.4934e-04
Loss = 1.2428e-01, PNorm = 55.4012, GNorm = 1.6459, lr_0 = 6.4889e-04
Loss = 1.3351e-01, PNorm = 55.4204, GNorm = 0.8694, lr_0 = 6.4845e-04
Loss = 1.3019e-01, PNorm = 55.4405, GNorm = 0.7039, lr_0 = 6.4800e-04
Loss = 1.3709e-01, PNorm = 55.4600, GNorm = 1.8706, lr_0 = 6.4756e-04
Loss = 1.3902e-01, PNorm = 55.4784, GNorm = 1.2220, lr_0 = 6.4712e-04
Loss = 1.2444e-01, PNorm = 55.4985, GNorm = 0.7929, lr_0 = 6.4667e-04
Loss = 1.0159e-01, PNorm = 55.5204, GNorm = 1.0714, lr_0 = 6.4623e-04
Loss = 1.2128e-01, PNorm = 55.5355, GNorm = 0.7663, lr_0 = 6.4579e-04
Loss = 1.2438e-01, PNorm = 55.5460, GNorm = 0.9698, lr_0 = 6.4534e-04
Loss = 1.2926e-01, PNorm = 55.5599, GNorm = 1.8690, lr_0 = 6.4490e-04
Loss = 1.3302e-01, PNorm = 55.5827, GNorm = 0.7386, lr_0 = 6.4446e-04
Loss = 1.3663e-01, PNorm = 55.6071, GNorm = 0.6855, lr_0 = 6.4402e-04
Loss = 1.1951e-01, PNorm = 55.6234, GNorm = 0.7643, lr_0 = 6.4358e-04
Loss = 1.2591e-01, PNorm = 55.6337, GNorm = 0.6878, lr_0 = 6.4314e-04
Loss = 1.1877e-01, PNorm = 55.6506, GNorm = 1.0570, lr_0 = 6.4270e-04
Loss = 1.0564e-01, PNorm = 55.6688, GNorm = 0.5984, lr_0 = 6.4226e-04
Loss = 1.0856e-01, PNorm = 55.6878, GNorm = 0.4713, lr_0 = 6.4182e-04
Loss = 1.2514e-01, PNorm = 55.7046, GNorm = 0.9471, lr_0 = 6.4138e-04
Loss = 1.2724e-01, PNorm = 55.7175, GNorm = 0.7470, lr_0 = 6.4094e-04
Loss = 1.2057e-01, PNorm = 55.7376, GNorm = 0.7619, lr_0 = 6.4050e-04
Loss = 1.1718e-01, PNorm = 55.7543, GNorm = 1.0141, lr_0 = 6.4006e-04
Loss = 1.0756e-01, PNorm = 55.7716, GNorm = 0.6082, lr_0 = 6.3962e-04
Loss = 1.3213e-01, PNorm = 55.7854, GNorm = 1.6028, lr_0 = 6.3918e-04
Loss = 1.0862e-01, PNorm = 55.8007, GNorm = 0.6043, lr_0 = 6.3874e-04
Loss = 1.2570e-01, PNorm = 55.8139, GNorm = 0.8604, lr_0 = 6.3831e-04
Loss = 1.1893e-01, PNorm = 55.8240, GNorm = 0.5619, lr_0 = 6.3787e-04
Loss = 1.3314e-01, PNorm = 55.8423, GNorm = 2.2697, lr_0 = 6.3743e-04
Loss = 1.2583e-01, PNorm = 55.8597, GNorm = 0.5897, lr_0 = 6.3700e-04
Loss = 1.0765e-01, PNorm = 55.8742, GNorm = 1.0350, lr_0 = 6.3656e-04
Loss = 1.1308e-01, PNorm = 55.8943, GNorm = 1.1539, lr_0 = 6.3612e-04
Loss = 1.0148e-01, PNorm = 55.9137, GNorm = 0.6627, lr_0 = 6.3569e-04
Loss = 1.0781e-01, PNorm = 55.9258, GNorm = 0.6008, lr_0 = 6.3525e-04
Loss = 1.3107e-01, PNorm = 55.9394, GNorm = 0.6928, lr_0 = 6.3482e-04
Loss = 1.0777e-01, PNorm = 55.9566, GNorm = 0.5770, lr_0 = 6.3438e-04
Loss = 1.1214e-01, PNorm = 55.9762, GNorm = 0.7582, lr_0 = 6.3395e-04
Loss = 1.1152e-01, PNorm = 55.9947, GNorm = 0.5444, lr_0 = 6.3351e-04
Loss = 1.2018e-01, PNorm = 56.0075, GNorm = 0.9166, lr_0 = 6.3308e-04
Loss = 1.2539e-01, PNorm = 56.0227, GNorm = 0.9270, lr_0 = 6.3265e-04
Loss = 1.1679e-01, PNorm = 56.0415, GNorm = 0.9640, lr_0 = 6.3221e-04
Loss = 1.2248e-01, PNorm = 56.0610, GNorm = 0.8760, lr_0 = 6.3178e-04
Loss = 1.1748e-01, PNorm = 56.0845, GNorm = 0.6321, lr_0 = 6.3135e-04
Loss = 1.2189e-01, PNorm = 56.0964, GNorm = 0.8038, lr_0 = 6.3091e-04
Loss = 1.1577e-01, PNorm = 56.1113, GNorm = 1.1906, lr_0 = 6.3048e-04
Loss = 1.1433e-01, PNorm = 56.1215, GNorm = 0.6323, lr_0 = 6.3005e-04
Loss = 1.1365e-01, PNorm = 56.1376, GNorm = 0.7006, lr_0 = 6.2962e-04
Loss = 1.2205e-01, PNorm = 56.1479, GNorm = 0.6649, lr_0 = 6.2919e-04
Loss = 1.1540e-01, PNorm = 56.1574, GNorm = 0.6393, lr_0 = 6.2876e-04
Loss = 1.2650e-01, PNorm = 56.1730, GNorm = 0.6355, lr_0 = 6.2833e-04
Loss = 1.1686e-01, PNorm = 56.1858, GNorm = 0.6787, lr_0 = 6.2789e-04
Loss = 1.1884e-01, PNorm = 56.2006, GNorm = 0.6191, lr_0 = 6.2746e-04
Loss = 1.2140e-01, PNorm = 56.2181, GNorm = 0.9419, lr_0 = 6.2703e-04
Loss = 1.1120e-01, PNorm = 56.2407, GNorm = 0.8366, lr_0 = 6.2661e-04
Loss = 1.2128e-01, PNorm = 56.2618, GNorm = 1.0963, lr_0 = 6.2618e-04
Loss = 1.0693e-01, PNorm = 56.2817, GNorm = 0.7134, lr_0 = 6.2575e-04
Loss = 1.1751e-01, PNorm = 56.2970, GNorm = 0.8460, lr_0 = 6.2532e-04
Loss = 1.1823e-01, PNorm = 56.3105, GNorm = 1.1476, lr_0 = 6.2489e-04
Loss = 1.1910e-01, PNorm = 56.3188, GNorm = 0.5930, lr_0 = 6.2446e-04
Loss = 1.1948e-01, PNorm = 56.3327, GNorm = 0.5745, lr_0 = 6.2403e-04
Loss = 1.2047e-01, PNorm = 56.3430, GNorm = 0.6751, lr_0 = 6.2361e-04
Loss = 1.2901e-01, PNorm = 56.3538, GNorm = 0.8943, lr_0 = 6.2318e-04
Loss = 1.3451e-01, PNorm = 56.3643, GNorm = 0.6084, lr_0 = 6.2275e-04
Loss = 1.4105e-01, PNorm = 56.3820, GNorm = 0.4145, lr_0 = 6.2233e-04
Loss = 1.2566e-01, PNorm = 56.4045, GNorm = 0.8054, lr_0 = 6.2190e-04
Loss = 1.2405e-01, PNorm = 56.4197, GNorm = 0.6518, lr_0 = 6.2147e-04
Loss = 1.3471e-01, PNorm = 56.4390, GNorm = 1.2355, lr_0 = 6.2105e-04
Loss = 1.2623e-01, PNorm = 56.4528, GNorm = 0.9892, lr_0 = 6.2062e-04
Loss = 1.4498e-01, PNorm = 56.4670, GNorm = 1.1007, lr_0 = 6.2020e-04
Loss = 1.2131e-01, PNorm = 56.4833, GNorm = 0.8635, lr_0 = 6.1977e-04
Loss = 1.3800e-01, PNorm = 56.5024, GNorm = 0.9004, lr_0 = 6.1935e-04
Loss = 1.2002e-01, PNorm = 56.5221, GNorm = 0.8517, lr_0 = 6.1892e-04
Loss = 1.1324e-01, PNorm = 56.5451, GNorm = 1.4082, lr_0 = 6.1850e-04
Loss = 1.2767e-01, PNorm = 56.5649, GNorm = 0.8449, lr_0 = 6.1808e-04
Loss = 1.1569e-01, PNorm = 56.5845, GNorm = 0.6941, lr_0 = 6.1765e-04
Loss = 1.0376e-01, PNorm = 56.5927, GNorm = 1.4262, lr_0 = 6.1723e-04
Loss = 1.0300e-01, PNorm = 56.5996, GNorm = 0.6042, lr_0 = 6.1681e-04
Loss = 1.3956e-01, PNorm = 56.6095, GNorm = 0.9472, lr_0 = 6.1638e-04
Loss = 1.2660e-01, PNorm = 56.6252, GNorm = 0.6384, lr_0 = 6.1596e-04
Loss = 1.2533e-01, PNorm = 56.6458, GNorm = 0.8310, lr_0 = 6.1554e-04
Loss = 1.2188e-01, PNorm = 56.6617, GNorm = 1.0327, lr_0 = 6.1512e-04
Loss = 1.0560e-01, PNorm = 56.6807, GNorm = 0.5696, lr_0 = 6.1470e-04
Loss = 1.0638e-01, PNorm = 56.6948, GNorm = 0.9090, lr_0 = 6.1428e-04
Loss = 1.2193e-01, PNorm = 56.7063, GNorm = 0.6285, lr_0 = 6.1385e-04
Loss = 9.7826e-02, PNorm = 56.7167, GNorm = 0.7221, lr_0 = 6.1343e-04
Loss = 1.1173e-01, PNorm = 56.7287, GNorm = 1.1004, lr_0 = 6.1301e-04
Loss = 1.1228e-01, PNorm = 56.7396, GNorm = 0.5862, lr_0 = 6.1259e-04
Loss = 1.0551e-01, PNorm = 56.7493, GNorm = 0.8694, lr_0 = 6.1217e-04
Loss = 1.2197e-01, PNorm = 56.7597, GNorm = 0.7129, lr_0 = 6.1175e-04
Loss = 1.1304e-01, PNorm = 56.7732, GNorm = 0.9400, lr_0 = 6.1134e-04
Loss = 1.0679e-01, PNorm = 56.7874, GNorm = 0.8334, lr_0 = 6.1092e-04
Loss = 1.3203e-01, PNorm = 56.8015, GNorm = 0.6756, lr_0 = 6.1050e-04
Validation mae = 0.427294
Epoch 8
Loss = 1.1070e-01, PNorm = 56.8182, GNorm = 1.2795, lr_0 = 6.1008e-04
Loss = 1.2223e-01, PNorm = 56.8361, GNorm = 1.6068, lr_0 = 6.0966e-04
Loss = 1.2869e-01, PNorm = 56.8563, GNorm = 1.0846, lr_0 = 6.0924e-04
Loss = 9.8677e-02, PNorm = 56.8701, GNorm = 1.2310, lr_0 = 6.0883e-04
Loss = 1.0044e-01, PNorm = 56.8827, GNorm = 0.5958, lr_0 = 6.0841e-04
Loss = 1.1707e-01, PNorm = 56.8945, GNorm = 0.8087, lr_0 = 6.0799e-04
Loss = 1.0451e-01, PNorm = 56.9063, GNorm = 0.7658, lr_0 = 6.0758e-04
Loss = 1.0802e-01, PNorm = 56.9203, GNorm = 0.7047, lr_0 = 6.0716e-04
Loss = 1.0359e-01, PNorm = 56.9362, GNorm = 0.7554, lr_0 = 6.0674e-04
Loss = 9.1623e-02, PNorm = 56.9493, GNorm = 0.9251, lr_0 = 6.0633e-04
Loss = 1.0924e-01, PNorm = 56.9610, GNorm = 1.3036, lr_0 = 6.0591e-04
Loss = 1.2975e-01, PNorm = 56.9771, GNorm = 0.7135, lr_0 = 6.0550e-04
Loss = 1.0672e-01, PNorm = 56.9958, GNorm = 0.8617, lr_0 = 6.0508e-04
Loss = 1.0540e-01, PNorm = 57.0106, GNorm = 0.6595, lr_0 = 6.0467e-04
Loss = 1.0593e-01, PNorm = 57.0249, GNorm = 0.5149, lr_0 = 6.0425e-04
Loss = 1.0803e-01, PNorm = 57.0395, GNorm = 0.9865, lr_0 = 6.0384e-04
Loss = 9.8169e-02, PNorm = 57.0557, GNorm = 0.4671, lr_0 = 6.0343e-04
Loss = 9.0367e-02, PNorm = 57.0688, GNorm = 0.8357, lr_0 = 6.0301e-04
Loss = 1.1692e-01, PNorm = 57.0808, GNorm = 0.7756, lr_0 = 6.0260e-04
Loss = 9.7600e-02, PNorm = 57.0977, GNorm = 0.5329, lr_0 = 6.0219e-04
Loss = 1.0053e-01, PNorm = 57.1107, GNorm = 0.7421, lr_0 = 6.0178e-04
Loss = 1.0434e-01, PNorm = 57.1247, GNorm = 0.6090, lr_0 = 6.0136e-04
Loss = 9.6134e-02, PNorm = 57.1386, GNorm = 0.6627, lr_0 = 6.0095e-04
Loss = 1.0414e-01, PNorm = 57.1531, GNorm = 2.0743, lr_0 = 6.0054e-04
Loss = 1.2475e-01, PNorm = 57.1681, GNorm = 0.5772, lr_0 = 6.0013e-04
Loss = 1.1751e-01, PNorm = 57.1895, GNorm = 0.9699, lr_0 = 5.9972e-04
Loss = 1.1949e-01, PNorm = 57.2117, GNorm = 0.8682, lr_0 = 5.9931e-04
Loss = 1.1116e-01, PNorm = 57.2355, GNorm = 0.5433, lr_0 = 5.9890e-04
Loss = 9.9147e-02, PNorm = 57.2508, GNorm = 0.7092, lr_0 = 5.9849e-04
Loss = 1.1118e-01, PNorm = 57.2633, GNorm = 0.6165, lr_0 = 5.9808e-04
Loss = 9.5653e-02, PNorm = 57.2775, GNorm = 0.6773, lr_0 = 5.9767e-04
Loss = 1.2697e-01, PNorm = 57.2913, GNorm = 1.0377, lr_0 = 5.9726e-04
Loss = 1.2298e-01, PNorm = 57.3091, GNorm = 1.5733, lr_0 = 5.9685e-04
Loss = 9.9994e-02, PNorm = 57.3303, GNorm = 0.8678, lr_0 = 5.9644e-04
Loss = 1.1357e-01, PNorm = 57.3460, GNorm = 0.6135, lr_0 = 5.9603e-04
Loss = 1.0912e-01, PNorm = 57.3630, GNorm = 0.4712, lr_0 = 5.9562e-04
Loss = 9.7500e-02, PNorm = 57.3792, GNorm = 0.5829, lr_0 = 5.9521e-04
Loss = 1.0851e-01, PNorm = 57.3882, GNorm = 0.6889, lr_0 = 5.9481e-04
Loss = 1.0691e-01, PNorm = 57.4035, GNorm = 1.1042, lr_0 = 5.9440e-04
Loss = 1.1379e-01, PNorm = 57.4162, GNorm = 1.0196, lr_0 = 5.9399e-04
Loss = 1.2097e-01, PNorm = 57.4234, GNorm = 0.5528, lr_0 = 5.9358e-04
Loss = 1.4785e-01, PNorm = 57.4403, GNorm = 1.8012, lr_0 = 5.9318e-04
Loss = 1.2059e-01, PNorm = 57.4643, GNorm = 1.4084, lr_0 = 5.9277e-04
Loss = 1.0771e-01, PNorm = 57.4819, GNorm = 0.7287, lr_0 = 5.9236e-04
Loss = 1.1514e-01, PNorm = 57.4942, GNorm = 0.8033, lr_0 = 5.9196e-04
Loss = 1.0057e-01, PNorm = 57.5085, GNorm = 1.1456, lr_0 = 5.9155e-04
Loss = 1.1268e-01, PNorm = 57.5208, GNorm = 0.6697, lr_0 = 5.9115e-04
Loss = 1.1397e-01, PNorm = 57.5325, GNorm = 0.6591, lr_0 = 5.9074e-04
Loss = 1.1235e-01, PNorm = 57.5459, GNorm = 0.9352, lr_0 = 5.9034e-04
Loss = 9.6256e-02, PNorm = 57.5603, GNorm = 0.6958, lr_0 = 5.8993e-04
Loss = 1.1628e-01, PNorm = 57.5794, GNorm = 0.5334, lr_0 = 5.8953e-04
Loss = 1.0389e-01, PNorm = 57.5909, GNorm = 0.5241, lr_0 = 5.8913e-04
Loss = 1.1120e-01, PNorm = 57.5979, GNorm = 1.2853, lr_0 = 5.8872e-04
Loss = 1.1285e-01, PNorm = 57.6087, GNorm = 0.7415, lr_0 = 5.8832e-04
Loss = 1.0502e-01, PNorm = 57.6222, GNorm = 0.9923, lr_0 = 5.8792e-04
Loss = 1.1322e-01, PNorm = 57.6381, GNorm = 1.1976, lr_0 = 5.8751e-04
Loss = 1.2591e-01, PNorm = 57.6571, GNorm = 0.8913, lr_0 = 5.8711e-04
Loss = 1.0753e-01, PNorm = 57.6723, GNorm = 1.0242, lr_0 = 5.8671e-04
Loss = 1.1258e-01, PNorm = 57.6853, GNorm = 1.6029, lr_0 = 5.8631e-04
Loss = 1.1712e-01, PNorm = 57.6980, GNorm = 0.8624, lr_0 = 5.8591e-04
Loss = 1.0164e-01, PNorm = 57.7084, GNorm = 0.5028, lr_0 = 5.8550e-04
Loss = 1.0363e-01, PNorm = 57.7245, GNorm = 0.6289, lr_0 = 5.8510e-04
Loss = 9.7672e-02, PNorm = 57.7382, GNorm = 0.7028, lr_0 = 5.8470e-04
Loss = 1.2163e-01, PNorm = 57.7546, GNorm = 0.5241, lr_0 = 5.8430e-04
Loss = 9.4851e-02, PNorm = 57.7662, GNorm = 0.8948, lr_0 = 5.8390e-04
Loss = 1.0983e-01, PNorm = 57.7811, GNorm = 1.3960, lr_0 = 5.8350e-04
Loss = 1.0149e-01, PNorm = 57.7938, GNorm = 1.1891, lr_0 = 5.8310e-04
Loss = 1.0648e-01, PNorm = 57.8048, GNorm = 0.7190, lr_0 = 5.8270e-04
Loss = 1.1693e-01, PNorm = 57.8177, GNorm = 0.6763, lr_0 = 5.8230e-04
Loss = 1.2319e-01, PNorm = 57.8327, GNorm = 1.6150, lr_0 = 5.8190e-04
Loss = 1.2896e-01, PNorm = 57.8485, GNorm = 1.7453, lr_0 = 5.8151e-04
Loss = 1.2043e-01, PNorm = 57.8664, GNorm = 0.7173, lr_0 = 5.8111e-04
Loss = 1.0732e-01, PNorm = 57.8824, GNorm = 0.5800, lr_0 = 5.8071e-04
Loss = 1.0688e-01, PNorm = 57.8949, GNorm = 0.8527, lr_0 = 5.8031e-04
Loss = 1.0051e-01, PNorm = 57.9120, GNorm = 0.6019, lr_0 = 5.7991e-04
Loss = 1.1506e-01, PNorm = 57.9239, GNorm = 0.7170, lr_0 = 5.7952e-04
Loss = 1.0049e-01, PNorm = 57.9338, GNorm = 0.5969, lr_0 = 5.7912e-04
Loss = 1.0891e-01, PNorm = 57.9461, GNorm = 0.7099, lr_0 = 5.7872e-04
Loss = 1.0383e-01, PNorm = 57.9587, GNorm = 0.4151, lr_0 = 5.7833e-04
Loss = 1.0784e-01, PNorm = 57.9682, GNorm = 0.7045, lr_0 = 5.7793e-04
Loss = 9.6411e-02, PNorm = 57.9787, GNorm = 0.8152, lr_0 = 5.7753e-04
Loss = 1.1491e-01, PNorm = 57.9901, GNorm = 0.8052, lr_0 = 5.7714e-04
Loss = 9.3338e-02, PNorm = 58.0088, GNorm = 0.6122, lr_0 = 5.7674e-04
Loss = 1.1278e-01, PNorm = 58.0236, GNorm = 0.5753, lr_0 = 5.7635e-04
Loss = 1.1087e-01, PNorm = 58.0361, GNorm = 0.7931, lr_0 = 5.7595e-04
Loss = 1.2008e-01, PNorm = 58.0545, GNorm = 1.0955, lr_0 = 5.7556e-04
Loss = 1.0771e-01, PNorm = 58.0717, GNorm = 0.8401, lr_0 = 5.7516e-04
Loss = 1.0192e-01, PNorm = 58.0840, GNorm = 0.7213, lr_0 = 5.7477e-04
Loss = 9.6688e-02, PNorm = 58.0930, GNorm = 0.7623, lr_0 = 5.7438e-04
Loss = 1.2620e-01, PNorm = 58.0970, GNorm = 0.9572, lr_0 = 5.7398e-04
Loss = 1.0894e-01, PNorm = 58.1069, GNorm = 0.8648, lr_0 = 5.7359e-04
Loss = 1.2223e-01, PNorm = 58.1212, GNorm = 0.7656, lr_0 = 5.7320e-04
Loss = 1.0735e-01, PNorm = 58.1389, GNorm = 1.0503, lr_0 = 5.7280e-04
Loss = 1.1855e-01, PNorm = 58.1500, GNorm = 1.0333, lr_0 = 5.7241e-04
Loss = 1.2169e-01, PNorm = 58.1633, GNorm = 0.6572, lr_0 = 5.7202e-04
Loss = 1.1257e-01, PNorm = 58.1759, GNorm = 0.7456, lr_0 = 5.7163e-04
Loss = 1.1949e-01, PNorm = 58.1893, GNorm = 0.7063, lr_0 = 5.7124e-04
Loss = 1.1152e-01, PNorm = 58.2061, GNorm = 0.8230, lr_0 = 5.7084e-04
Loss = 1.2459e-01, PNorm = 58.2193, GNorm = 0.9275, lr_0 = 5.7045e-04
Loss = 1.2172e-01, PNorm = 58.2367, GNorm = 0.8168, lr_0 = 5.7006e-04
Loss = 9.5298e-02, PNorm = 58.2541, GNorm = 0.6736, lr_0 = 5.6967e-04
Loss = 9.7594e-02, PNorm = 58.2631, GNorm = 0.7147, lr_0 = 5.6928e-04
Loss = 1.0752e-01, PNorm = 58.2711, GNorm = 0.7652, lr_0 = 5.6889e-04
Loss = 1.0913e-01, PNorm = 58.2857, GNorm = 1.0590, lr_0 = 5.6850e-04
Loss = 1.0643e-01, PNorm = 58.2994, GNorm = 0.4574, lr_0 = 5.6811e-04
Loss = 1.0701e-01, PNorm = 58.3086, GNorm = 0.8665, lr_0 = 5.6772e-04
Loss = 1.1821e-01, PNorm = 58.3232, GNorm = 0.8431, lr_0 = 5.6733e-04
Loss = 1.1405e-01, PNorm = 58.3347, GNorm = 1.1441, lr_0 = 5.6695e-04
Loss = 1.0710e-01, PNorm = 58.3527, GNorm = 0.9297, lr_0 = 5.6656e-04
Loss = 1.1576e-01, PNorm = 58.3779, GNorm = 0.9081, lr_0 = 5.6617e-04
Loss = 1.2438e-01, PNorm = 58.3961, GNorm = 0.5326, lr_0 = 5.6578e-04
Loss = 1.0748e-01, PNorm = 58.4105, GNorm = 0.5541, lr_0 = 5.6539e-04
Loss = 1.1193e-01, PNorm = 58.4228, GNorm = 0.6626, lr_0 = 5.6501e-04
Loss = 1.0224e-01, PNorm = 58.4336, GNorm = 1.2170, lr_0 = 5.6462e-04
Loss = 1.3120e-01, PNorm = 58.4574, GNorm = 1.2372, lr_0 = 5.6423e-04
Loss = 1.0328e-01, PNorm = 58.4748, GNorm = 0.6911, lr_0 = 5.6385e-04
Loss = 9.8939e-02, PNorm = 58.4858, GNorm = 0.4615, lr_0 = 5.6346e-04
Loss = 1.1691e-01, PNorm = 58.4940, GNorm = 0.7028, lr_0 = 5.6307e-04
Loss = 1.0456e-01, PNorm = 58.5004, GNorm = 0.5043, lr_0 = 5.6269e-04
Loss = 1.1595e-01, PNorm = 58.5104, GNorm = 0.9555, lr_0 = 5.6230e-04
Validation mae = 0.431175
Epoch 9
Loss = 8.7361e-02, PNorm = 58.5278, GNorm = 0.4754, lr_0 = 5.6192e-04
Loss = 1.0548e-01, PNorm = 58.5472, GNorm = 0.5550, lr_0 = 5.6153e-04
Loss = 8.6334e-02, PNorm = 58.5629, GNorm = 0.6419, lr_0 = 5.6115e-04
Loss = 8.5283e-02, PNorm = 58.5782, GNorm = 0.5745, lr_0 = 5.6076e-04
Loss = 7.7921e-02, PNorm = 58.5918, GNorm = 0.5806, lr_0 = 5.6038e-04
Loss = 1.0219e-01, PNorm = 58.6098, GNorm = 0.9820, lr_0 = 5.6000e-04
Loss = 1.1285e-01, PNorm = 58.6246, GNorm = 0.8754, lr_0 = 5.5961e-04
Loss = 9.5696e-02, PNorm = 58.6392, GNorm = 1.1376, lr_0 = 5.5923e-04
Loss = 1.0672e-01, PNorm = 58.6565, GNorm = 0.6378, lr_0 = 5.5885e-04
Loss = 1.0831e-01, PNorm = 58.6715, GNorm = 0.6740, lr_0 = 5.5846e-04
Loss = 1.0309e-01, PNorm = 58.6872, GNorm = 0.4860, lr_0 = 5.5808e-04
Loss = 1.0232e-01, PNorm = 58.7007, GNorm = 0.6838, lr_0 = 5.5770e-04
Loss = 1.0274e-01, PNorm = 58.7135, GNorm = 0.6906, lr_0 = 5.5732e-04
Loss = 1.1013e-01, PNorm = 58.7286, GNorm = 1.2314, lr_0 = 5.5693e-04
Loss = 1.0144e-01, PNorm = 58.7490, GNorm = 0.6072, lr_0 = 5.5655e-04
Loss = 9.8890e-02, PNorm = 58.7664, GNorm = 1.3276, lr_0 = 5.5617e-04
Loss = 1.0447e-01, PNorm = 58.7813, GNorm = 0.8207, lr_0 = 5.5579e-04
Loss = 1.1413e-01, PNorm = 58.8000, GNorm = 0.8628, lr_0 = 5.5541e-04
Loss = 9.4048e-02, PNorm = 58.8093, GNorm = 0.6851, lr_0 = 5.5503e-04
Loss = 1.0354e-01, PNorm = 58.8159, GNorm = 0.7736, lr_0 = 5.5465e-04
Loss = 1.0064e-01, PNorm = 58.8235, GNorm = 0.5771, lr_0 = 5.5427e-04
Loss = 1.0490e-01, PNorm = 58.8335, GNorm = 1.1641, lr_0 = 5.5389e-04
Loss = 9.3372e-02, PNorm = 58.8499, GNorm = 0.7048, lr_0 = 5.5351e-04
Loss = 8.6686e-02, PNorm = 58.8667, GNorm = 0.6309, lr_0 = 5.5313e-04
Loss = 1.1108e-01, PNorm = 58.8864, GNorm = 0.7818, lr_0 = 5.5275e-04
Loss = 1.1509e-01, PNorm = 58.9080, GNorm = 0.9930, lr_0 = 5.5237e-04
Loss = 9.7872e-02, PNorm = 58.9253, GNorm = 0.9134, lr_0 = 5.5199e-04
Loss = 1.0679e-01, PNorm = 58.9415, GNorm = 0.5745, lr_0 = 5.5162e-04
Loss = 1.0482e-01, PNorm = 58.9578, GNorm = 0.9423, lr_0 = 5.5124e-04
Loss = 1.0545e-01, PNorm = 58.9700, GNorm = 0.5897, lr_0 = 5.5086e-04
Loss = 9.2142e-02, PNorm = 58.9820, GNorm = 0.9493, lr_0 = 5.5048e-04
Loss = 1.1982e-01, PNorm = 58.9982, GNorm = 1.0584, lr_0 = 5.5011e-04
Loss = 9.6513e-02, PNorm = 59.0108, GNorm = 0.9001, lr_0 = 5.4973e-04
Loss = 1.0641e-01, PNorm = 59.0291, GNorm = 0.9138, lr_0 = 5.4935e-04
Loss = 1.0278e-01, PNorm = 59.0478, GNorm = 1.2368, lr_0 = 5.4898e-04
Loss = 1.2100e-01, PNorm = 59.0633, GNorm = 0.8252, lr_0 = 5.4860e-04
Loss = 1.0258e-01, PNorm = 59.0740, GNorm = 0.9314, lr_0 = 5.4822e-04
Loss = 1.0999e-01, PNorm = 59.0877, GNorm = 1.2253, lr_0 = 5.4785e-04
Loss = 9.6657e-02, PNorm = 59.1062, GNorm = 0.4935, lr_0 = 5.4747e-04
Loss = 1.1780e-01, PNorm = 59.1207, GNorm = 0.8694, lr_0 = 5.4710e-04
Loss = 9.5363e-02, PNorm = 59.1331, GNorm = 0.5592, lr_0 = 5.4672e-04
Loss = 1.0536e-01, PNorm = 59.1407, GNorm = 0.8561, lr_0 = 5.4635e-04
Loss = 1.0294e-01, PNorm = 59.1474, GNorm = 0.7194, lr_0 = 5.4597e-04
Loss = 1.1003e-01, PNorm = 59.1572, GNorm = 2.2043, lr_0 = 5.4560e-04
Loss = 1.1161e-01, PNorm = 59.1704, GNorm = 0.5640, lr_0 = 5.4523e-04
Loss = 1.0056e-01, PNorm = 59.1835, GNorm = 0.9347, lr_0 = 5.4485e-04
Loss = 8.5226e-02, PNorm = 59.1950, GNorm = 0.7679, lr_0 = 5.4448e-04
Loss = 1.1533e-01, PNorm = 59.2104, GNorm = 0.6219, lr_0 = 5.4411e-04
Loss = 9.9511e-02, PNorm = 59.2282, GNorm = 0.6934, lr_0 = 5.4373e-04
Loss = 8.8127e-02, PNorm = 59.2392, GNorm = 0.6712, lr_0 = 5.4336e-04
Loss = 8.6496e-02, PNorm = 59.2489, GNorm = 0.3809, lr_0 = 5.4299e-04
Loss = 1.1157e-01, PNorm = 59.2590, GNorm = 0.6262, lr_0 = 5.4262e-04
Loss = 1.1287e-01, PNorm = 59.2693, GNorm = 0.5033, lr_0 = 5.4225e-04
Loss = 7.9742e-02, PNorm = 59.2828, GNorm = 1.1468, lr_0 = 5.4187e-04
Loss = 9.5628e-02, PNorm = 59.2901, GNorm = 0.7441, lr_0 = 5.4150e-04
Loss = 9.9224e-02, PNorm = 59.3046, GNorm = 0.8335, lr_0 = 5.4113e-04
Loss = 9.5811e-02, PNorm = 59.3192, GNorm = 0.6958, lr_0 = 5.4076e-04
Loss = 9.4551e-02, PNorm = 59.3241, GNorm = 0.8976, lr_0 = 5.4039e-04
Loss = 1.0303e-01, PNorm = 59.3323, GNorm = 0.6241, lr_0 = 5.4002e-04
Loss = 1.1044e-01, PNorm = 59.3470, GNorm = 0.7660, lr_0 = 5.3965e-04
Loss = 1.2216e-01, PNorm = 59.3641, GNorm = 1.2971, lr_0 = 5.3928e-04
Loss = 1.0592e-01, PNorm = 59.3803, GNorm = 1.2692, lr_0 = 5.3891e-04
Loss = 9.4946e-02, PNorm = 59.3896, GNorm = 0.7196, lr_0 = 5.3854e-04
Loss = 1.2355e-01, PNorm = 59.3961, GNorm = 1.0291, lr_0 = 5.3817e-04
Loss = 9.9216e-02, PNorm = 59.4124, GNorm = 0.9798, lr_0 = 5.3781e-04
Loss = 1.1336e-01, PNorm = 59.4304, GNorm = 1.9931, lr_0 = 5.3744e-04
Loss = 1.0041e-01, PNorm = 59.4449, GNorm = 1.6491, lr_0 = 5.3707e-04
Loss = 1.2177e-01, PNorm = 59.4582, GNorm = 2.4350, lr_0 = 5.3670e-04
Loss = 1.1206e-01, PNorm = 59.4771, GNorm = 1.8922, lr_0 = 5.3633e-04
Loss = 9.3882e-02, PNorm = 59.4948, GNorm = 0.8241, lr_0 = 5.3597e-04
Loss = 9.5044e-02, PNorm = 59.5127, GNorm = 0.5291, lr_0 = 5.3560e-04
Loss = 1.0329e-01, PNorm = 59.5288, GNorm = 0.8273, lr_0 = 5.3523e-04
Loss = 1.0201e-01, PNorm = 59.5424, GNorm = 0.9863, lr_0 = 5.3486e-04
Loss = 1.1304e-01, PNorm = 59.5554, GNorm = 1.3548, lr_0 = 5.3450e-04
Loss = 1.0007e-01, PNorm = 59.5712, GNorm = 1.0699, lr_0 = 5.3413e-04
Loss = 9.2289e-02, PNorm = 59.5862, GNorm = 0.8231, lr_0 = 5.3377e-04
Loss = 9.8020e-02, PNorm = 59.5990, GNorm = 1.0022, lr_0 = 5.3340e-04
Loss = 9.7232e-02, PNorm = 59.6124, GNorm = 0.5600, lr_0 = 5.3304e-04
Loss = 8.9771e-02, PNorm = 59.6238, GNorm = 0.6414, lr_0 = 5.3267e-04
Loss = 1.0086e-01, PNorm = 59.6339, GNorm = 0.5439, lr_0 = 5.3231e-04
Loss = 9.2940e-02, PNorm = 59.6433, GNorm = 0.7330, lr_0 = 5.3194e-04
Loss = 9.1917e-02, PNorm = 59.6515, GNorm = 0.9358, lr_0 = 5.3158e-04
Loss = 1.1060e-01, PNorm = 59.6644, GNorm = 1.0904, lr_0 = 5.3121e-04
Loss = 1.0127e-01, PNorm = 59.6793, GNorm = 0.4755, lr_0 = 5.3085e-04
Loss = 9.1959e-02, PNorm = 59.6913, GNorm = 0.6958, lr_0 = 5.3048e-04
Loss = 8.4929e-02, PNorm = 59.7001, GNorm = 0.6529, lr_0 = 5.3012e-04
Loss = 9.6514e-02, PNorm = 59.7079, GNorm = 0.7484, lr_0 = 5.2976e-04
Loss = 1.1170e-01, PNorm = 59.7223, GNorm = 0.5360, lr_0 = 5.2939e-04
Loss = 1.1359e-01, PNorm = 59.7400, GNorm = 0.7207, lr_0 = 5.2903e-04
Loss = 1.0335e-01, PNorm = 59.7597, GNorm = 0.5966, lr_0 = 5.2867e-04
Loss = 1.2514e-01, PNorm = 59.7853, GNorm = 0.6823, lr_0 = 5.2831e-04
Loss = 1.0481e-01, PNorm = 59.8061, GNorm = 0.6898, lr_0 = 5.2795e-04
Loss = 9.9798e-02, PNorm = 59.8167, GNorm = 1.4202, lr_0 = 5.2758e-04
Loss = 1.0612e-01, PNorm = 59.8253, GNorm = 0.7083, lr_0 = 5.2722e-04
Loss = 8.9997e-02, PNorm = 59.8350, GNorm = 0.6296, lr_0 = 5.2686e-04
Loss = 1.1679e-01, PNorm = 59.8476, GNorm = 0.6787, lr_0 = 5.2650e-04
Loss = 8.9253e-02, PNorm = 59.8580, GNorm = 0.5124, lr_0 = 5.2614e-04
Loss = 9.8332e-02, PNorm = 59.8685, GNorm = 1.1095, lr_0 = 5.2578e-04
Loss = 1.1042e-01, PNorm = 59.8826, GNorm = 0.7279, lr_0 = 5.2542e-04
Loss = 1.0197e-01, PNorm = 59.8940, GNorm = 0.5612, lr_0 = 5.2506e-04
Loss = 1.0337e-01, PNorm = 59.9016, GNorm = 0.9080, lr_0 = 5.2470e-04
Loss = 1.0319e-01, PNorm = 59.9122, GNorm = 0.6719, lr_0 = 5.2434e-04
Loss = 1.0080e-01, PNorm = 59.9269, GNorm = 0.9899, lr_0 = 5.2398e-04
Loss = 1.0238e-01, PNorm = 59.9368, GNorm = 0.6170, lr_0 = 5.2362e-04
Loss = 9.9102e-02, PNorm = 59.9483, GNorm = 0.4975, lr_0 = 5.2326e-04
Loss = 1.1038e-01, PNorm = 59.9652, GNorm = 1.1042, lr_0 = 5.2290e-04
Loss = 9.8633e-02, PNorm = 59.9801, GNorm = 0.8090, lr_0 = 5.2255e-04
Loss = 1.0770e-01, PNorm = 59.9922, GNorm = 1.0009, lr_0 = 5.2219e-04
Loss = 9.4442e-02, PNorm = 60.0059, GNorm = 0.4620, lr_0 = 5.2183e-04
Loss = 9.6216e-02, PNorm = 60.0144, GNorm = 0.5701, lr_0 = 5.2147e-04
Loss = 8.6696e-02, PNorm = 60.0251, GNorm = 0.6097, lr_0 = 5.2112e-04
Loss = 1.0698e-01, PNorm = 60.0426, GNorm = 0.8586, lr_0 = 5.2076e-04
Loss = 1.0428e-01, PNorm = 60.0547, GNorm = 0.6326, lr_0 = 5.2040e-04
Loss = 9.1205e-02, PNorm = 60.0638, GNorm = 0.9109, lr_0 = 5.2005e-04
Loss = 1.0448e-01, PNorm = 60.0763, GNorm = 0.7825, lr_0 = 5.1969e-04
Loss = 1.0033e-01, PNorm = 60.0905, GNorm = 0.7535, lr_0 = 5.1933e-04
Loss = 1.0490e-01, PNorm = 60.1033, GNorm = 0.9257, lr_0 = 5.1898e-04
Loss = 1.0055e-01, PNorm = 60.1169, GNorm = 0.6993, lr_0 = 5.1862e-04
Loss = 8.8410e-02, PNorm = 60.1278, GNorm = 0.7889, lr_0 = 5.1827e-04
Loss = 9.3268e-02, PNorm = 60.1400, GNorm = 0.6729, lr_0 = 5.1791e-04
Validation mae = 0.410072
Epoch 10
Loss = 1.0117e-01, PNorm = 60.1532, GNorm = 1.2590, lr_0 = 5.1756e-04
Loss = 8.4988e-02, PNorm = 60.1616, GNorm = 0.8228, lr_0 = 5.1720e-04
Loss = 9.7563e-02, PNorm = 60.1747, GNorm = 0.5974, lr_0 = 5.1685e-04
Loss = 7.9978e-02, PNorm = 60.1878, GNorm = 0.9273, lr_0 = 5.1649e-04
Loss = 9.1705e-02, PNorm = 60.1989, GNorm = 0.6501, lr_0 = 5.1614e-04
Loss = 7.5415e-02, PNorm = 60.2087, GNorm = 1.3788, lr_0 = 5.1579e-04
Loss = 8.8404e-02, PNorm = 60.2211, GNorm = 0.9314, lr_0 = 5.1543e-04
Loss = 8.9670e-02, PNorm = 60.2373, GNorm = 0.4264, lr_0 = 5.1508e-04
Loss = 8.9182e-02, PNorm = 60.2469, GNorm = 0.5070, lr_0 = 5.1473e-04
Loss = 8.8552e-02, PNorm = 60.2602, GNorm = 0.9218, lr_0 = 5.1437e-04
Loss = 1.0846e-01, PNorm = 60.2754, GNorm = 1.3190, lr_0 = 5.1402e-04
Loss = 8.7210e-02, PNorm = 60.2934, GNorm = 0.4423, lr_0 = 5.1367e-04
Loss = 1.1337e-01, PNorm = 60.3033, GNorm = 1.2905, lr_0 = 5.1332e-04
Loss = 1.0063e-01, PNorm = 60.3119, GNorm = 1.1255, lr_0 = 5.1297e-04
Loss = 9.9135e-02, PNorm = 60.3238, GNorm = 0.6968, lr_0 = 5.1262e-04
Loss = 1.0697e-01, PNorm = 60.3374, GNorm = 0.8573, lr_0 = 5.1226e-04
Loss = 1.0046e-01, PNorm = 60.3523, GNorm = 0.4952, lr_0 = 5.1191e-04
Loss = 9.3828e-02, PNorm = 60.3642, GNorm = 0.4894, lr_0 = 5.1156e-04
Loss = 8.6995e-02, PNorm = 60.3794, GNorm = 0.4794, lr_0 = 5.1121e-04
Loss = 8.2798e-02, PNorm = 60.3897, GNorm = 1.1172, lr_0 = 5.1086e-04
Loss = 8.9011e-02, PNorm = 60.3985, GNorm = 0.7148, lr_0 = 5.1051e-04
Loss = 8.7700e-02, PNorm = 60.4108, GNorm = 0.5468, lr_0 = 5.1016e-04
Loss = 9.9719e-02, PNorm = 60.4265, GNorm = 0.7356, lr_0 = 5.0981e-04
Loss = 9.3000e-02, PNorm = 60.4392, GNorm = 0.7650, lr_0 = 5.0946e-04
Loss = 9.7254e-02, PNorm = 60.4499, GNorm = 0.7077, lr_0 = 5.0911e-04
Loss = 1.0751e-01, PNorm = 60.4659, GNorm = 0.4611, lr_0 = 5.0877e-04
Loss = 9.1667e-02, PNorm = 60.4845, GNorm = 0.7258, lr_0 = 5.0842e-04
Loss = 8.4412e-02, PNorm = 60.4994, GNorm = 0.3948, lr_0 = 5.0807e-04
Loss = 9.4092e-02, PNorm = 60.5061, GNorm = 0.7245, lr_0 = 5.0772e-04
Loss = 9.1968e-02, PNorm = 60.5145, GNorm = 1.1631, lr_0 = 5.0737e-04
Loss = 9.0204e-02, PNorm = 60.5253, GNorm = 1.2066, lr_0 = 5.0703e-04
Loss = 8.6790e-02, PNorm = 60.5410, GNorm = 0.8206, lr_0 = 5.0668e-04
Loss = 1.1032e-01, PNorm = 60.5585, GNorm = 1.2898, lr_0 = 5.0633e-04
Loss = 9.6696e-02, PNorm = 60.5686, GNorm = 0.5434, lr_0 = 5.0598e-04
Loss = 9.1498e-02, PNorm = 60.5778, GNorm = 0.7907, lr_0 = 5.0564e-04
Loss = 9.5857e-02, PNorm = 60.5927, GNorm = 1.2276, lr_0 = 5.0529e-04
Loss = 8.8268e-02, PNorm = 60.6046, GNorm = 0.6859, lr_0 = 5.0494e-04
Loss = 9.9128e-02, PNorm = 60.6189, GNorm = 1.2649, lr_0 = 5.0460e-04
Loss = 8.7198e-02, PNorm = 60.6309, GNorm = 0.7649, lr_0 = 5.0425e-04
Loss = 9.8919e-02, PNorm = 60.6418, GNorm = 0.6058, lr_0 = 5.0391e-04
Loss = 8.2970e-02, PNorm = 60.6540, GNorm = 0.7039, lr_0 = 5.0356e-04
Loss = 7.4012e-02, PNorm = 60.6631, GNorm = 0.4659, lr_0 = 5.0322e-04
Loss = 9.5674e-02, PNorm = 60.6722, GNorm = 0.4840, lr_0 = 5.0287e-04
Loss = 1.0616e-01, PNorm = 60.6865, GNorm = 0.6863, lr_0 = 5.0253e-04
Loss = 1.0277e-01, PNorm = 60.6992, GNorm = 0.6182, lr_0 = 5.0218e-04
Loss = 8.7780e-02, PNorm = 60.7118, GNorm = 1.0637, lr_0 = 5.0184e-04
Loss = 1.0214e-01, PNorm = 60.7245, GNorm = 0.5813, lr_0 = 5.0150e-04
Loss = 1.0265e-01, PNorm = 60.7356, GNorm = 0.4781, lr_0 = 5.0115e-04
Loss = 8.8670e-02, PNorm = 60.7479, GNorm = 0.6707, lr_0 = 5.0081e-04
Loss = 9.0166e-02, PNorm = 60.7577, GNorm = 0.7117, lr_0 = 5.0047e-04
Loss = 9.4338e-02, PNorm = 60.7648, GNorm = 0.6238, lr_0 = 5.0012e-04
Loss = 1.0250e-01, PNorm = 60.7745, GNorm = 0.6075, lr_0 = 4.9978e-04
Loss = 1.0590e-01, PNorm = 60.7875, GNorm = 0.5690, lr_0 = 4.9944e-04
Loss = 9.8819e-02, PNorm = 60.7946, GNorm = 0.6044, lr_0 = 4.9910e-04
Loss = 9.7372e-02, PNorm = 60.8051, GNorm = 0.5633, lr_0 = 4.9875e-04
Loss = 8.7397e-02, PNorm = 60.8177, GNorm = 0.6836, lr_0 = 4.9841e-04
Loss = 8.5287e-02, PNorm = 60.8320, GNorm = 0.5796, lr_0 = 4.9807e-04
Loss = 1.0427e-01, PNorm = 60.8448, GNorm = 0.8001, lr_0 = 4.9773e-04
Loss = 1.0080e-01, PNorm = 60.8601, GNorm = 0.9434, lr_0 = 4.9739e-04
Loss = 8.7141e-02, PNorm = 60.8727, GNorm = 0.5588, lr_0 = 4.9705e-04
Loss = 8.2856e-02, PNorm = 60.8830, GNorm = 0.4151, lr_0 = 4.9671e-04
Loss = 9.5730e-02, PNorm = 60.8973, GNorm = 1.3526, lr_0 = 4.9637e-04
Loss = 1.0165e-01, PNorm = 60.9107, GNorm = 0.8891, lr_0 = 4.9603e-04
Loss = 9.5732e-02, PNorm = 60.9214, GNorm = 0.8029, lr_0 = 4.9569e-04
Loss = 9.8012e-02, PNorm = 60.9311, GNorm = 0.5110, lr_0 = 4.9535e-04
Loss = 9.6065e-02, PNorm = 60.9420, GNorm = 1.2312, lr_0 = 4.9501e-04
Loss = 8.6756e-02, PNorm = 60.9550, GNorm = 0.9095, lr_0 = 4.9467e-04
Loss = 1.0794e-01, PNorm = 60.9689, GNorm = 0.5933, lr_0 = 4.9433e-04
Loss = 1.0336e-01, PNorm = 60.9838, GNorm = 1.1453, lr_0 = 4.9399e-04
Loss = 8.9290e-02, PNorm = 60.9970, GNorm = 0.7986, lr_0 = 4.9365e-04
Loss = 9.3331e-02, PNorm = 61.0072, GNorm = 0.9681, lr_0 = 4.9332e-04
Loss = 1.0007e-01, PNorm = 61.0169, GNorm = 0.7631, lr_0 = 4.9298e-04
Loss = 9.1822e-02, PNorm = 61.0286, GNorm = 0.4819, lr_0 = 4.9264e-04
Loss = 8.7916e-02, PNorm = 61.0400, GNorm = 0.5005, lr_0 = 4.9230e-04
Loss = 9.6061e-02, PNorm = 61.0508, GNorm = 0.4398, lr_0 = 4.9197e-04
Loss = 1.0124e-01, PNorm = 61.0657, GNorm = 0.8648, lr_0 = 4.9163e-04
Loss = 9.9204e-02, PNorm = 61.0809, GNorm = 1.3018, lr_0 = 4.9129e-04
Loss = 1.0731e-01, PNorm = 61.0979, GNorm = 0.5273, lr_0 = 4.9095e-04
Loss = 9.1593e-02, PNorm = 61.1092, GNorm = 1.7770, lr_0 = 4.9062e-04
Loss = 1.0457e-01, PNorm = 61.1215, GNorm = 1.0774, lr_0 = 4.9028e-04
Loss = 9.4772e-02, PNorm = 61.1329, GNorm = 0.7886, lr_0 = 4.8995e-04
Loss = 8.8924e-02, PNorm = 61.1456, GNorm = 0.4920, lr_0 = 4.8961e-04
Loss = 9.7037e-02, PNorm = 61.1589, GNorm = 0.6357, lr_0 = 4.8928e-04
Loss = 9.4417e-02, PNorm = 61.1724, GNorm = 0.6537, lr_0 = 4.8894e-04
Loss = 9.4838e-02, PNorm = 61.1847, GNorm = 0.9210, lr_0 = 4.8861e-04
Loss = 9.9241e-02, PNorm = 61.1947, GNorm = 0.8553, lr_0 = 4.8827e-04
Loss = 1.0054e-01, PNorm = 61.2051, GNorm = 1.0608, lr_0 = 4.8794e-04
Loss = 8.4533e-02, PNorm = 61.2169, GNorm = 0.6159, lr_0 = 4.8760e-04
Loss = 1.0325e-01, PNorm = 61.2302, GNorm = 1.2607, lr_0 = 4.8727e-04
Loss = 9.6999e-02, PNorm = 61.2417, GNorm = 0.7859, lr_0 = 4.8693e-04
Loss = 9.6706e-02, PNorm = 61.2518, GNorm = 1.1331, lr_0 = 4.8660e-04
Loss = 8.6367e-02, PNorm = 61.2589, GNorm = 0.6187, lr_0 = 4.8627e-04
Loss = 9.9909e-02, PNorm = 61.2648, GNorm = 0.9187, lr_0 = 4.8593e-04
Loss = 9.8193e-02, PNorm = 61.2746, GNorm = 0.6846, lr_0 = 4.8560e-04
Loss = 9.5081e-02, PNorm = 61.2870, GNorm = 0.5797, lr_0 = 4.8527e-04
Loss = 9.9507e-02, PNorm = 61.2978, GNorm = 0.4863, lr_0 = 4.8494e-04
Loss = 8.6083e-02, PNorm = 61.3082, GNorm = 0.5691, lr_0 = 4.8460e-04
Loss = 1.0813e-01, PNorm = 61.3201, GNorm = 0.7555, lr_0 = 4.8427e-04
Loss = 9.5683e-02, PNorm = 61.3308, GNorm = 0.5805, lr_0 = 4.8394e-04
Loss = 8.9849e-02, PNorm = 61.3403, GNorm = 1.4241, lr_0 = 4.8361e-04
Loss = 1.1487e-01, PNorm = 61.3523, GNorm = 1.8130, lr_0 = 4.8328e-04
Loss = 1.0576e-01, PNorm = 61.3694, GNorm = 0.7340, lr_0 = 4.8295e-04
Loss = 8.5671e-02, PNorm = 61.3841, GNorm = 0.8990, lr_0 = 4.8262e-04
Loss = 9.3824e-02, PNorm = 61.3912, GNorm = 0.6495, lr_0 = 4.8228e-04
Loss = 9.3959e-02, PNorm = 61.3988, GNorm = 0.5380, lr_0 = 4.8195e-04
Loss = 7.2921e-02, PNorm = 61.4122, GNorm = 0.5840, lr_0 = 4.8162e-04
Loss = 9.1539e-02, PNorm = 61.4248, GNorm = 0.5606, lr_0 = 4.8129e-04
Loss = 8.6403e-02, PNorm = 61.4320, GNorm = 0.4965, lr_0 = 4.8096e-04
Loss = 9.9743e-02, PNorm = 61.4407, GNorm = 0.7529, lr_0 = 4.8064e-04
Loss = 8.0389e-02, PNorm = 61.4519, GNorm = 0.4800, lr_0 = 4.8031e-04
Loss = 9.6433e-02, PNorm = 61.4588, GNorm = 0.9955, lr_0 = 4.7998e-04
Loss = 9.3518e-02, PNorm = 61.4669, GNorm = 0.5210, lr_0 = 4.7965e-04
Loss = 8.5716e-02, PNorm = 61.4722, GNorm = 0.5348, lr_0 = 4.7932e-04
Loss = 8.6286e-02, PNorm = 61.4801, GNorm = 1.0185, lr_0 = 4.7899e-04
Loss = 9.9505e-02, PNorm = 61.4877, GNorm = 0.7370, lr_0 = 4.7866e-04
Loss = 8.0833e-02, PNorm = 61.4943, GNorm = 0.4574, lr_0 = 4.7833e-04
Loss = 1.0854e-01, PNorm = 61.5052, GNorm = 1.1860, lr_0 = 4.7801e-04
Loss = 9.5262e-02, PNorm = 61.5166, GNorm = 0.5796, lr_0 = 4.7768e-04
Loss = 9.9877e-02, PNorm = 61.5279, GNorm = 0.5542, lr_0 = 4.7735e-04
Loss = 9.1004e-02, PNorm = 61.5427, GNorm = 0.6096, lr_0 = 4.7703e-04
Validation mae = 0.397655
Epoch 11
Loss = 7.2102e-02, PNorm = 61.5549, GNorm = 0.6258, lr_0 = 4.7670e-04
Loss = 8.9260e-02, PNorm = 61.5693, GNorm = 0.6838, lr_0 = 4.7637e-04
Loss = 8.6768e-02, PNorm = 61.5854, GNorm = 1.3346, lr_0 = 4.7605e-04
Loss = 1.0408e-01, PNorm = 61.5936, GNorm = 0.6903, lr_0 = 4.7572e-04
Loss = 8.1842e-02, PNorm = 61.6005, GNorm = 0.6888, lr_0 = 4.7539e-04
Loss = 9.4501e-02, PNorm = 61.6096, GNorm = 0.5352, lr_0 = 4.7507e-04
Loss = 8.1992e-02, PNorm = 61.6171, GNorm = 0.7652, lr_0 = 4.7474e-04
Loss = 7.5475e-02, PNorm = 61.6263, GNorm = 0.7068, lr_0 = 4.7442e-04
Loss = 7.7825e-02, PNorm = 61.6394, GNorm = 0.4666, lr_0 = 4.7409e-04
Loss = 8.8518e-02, PNorm = 61.6505, GNorm = 0.5261, lr_0 = 4.7377e-04
Loss = 8.1499e-02, PNorm = 61.6575, GNorm = 0.6481, lr_0 = 4.7344e-04
Loss = 7.2046e-02, PNorm = 61.6697, GNorm = 0.5553, lr_0 = 4.7312e-04
Loss = 7.1151e-02, PNorm = 61.6826, GNorm = 0.5664, lr_0 = 4.7279e-04
Loss = 8.1036e-02, PNorm = 61.6893, GNorm = 0.8938, lr_0 = 4.7247e-04
Loss = 7.6487e-02, PNorm = 61.6978, GNorm = 0.5979, lr_0 = 4.7215e-04
Loss = 9.6348e-02, PNorm = 61.7063, GNorm = 1.1326, lr_0 = 4.7182e-04
Loss = 9.2745e-02, PNorm = 61.7179, GNorm = 1.5039, lr_0 = 4.7150e-04
Loss = 1.1106e-01, PNorm = 61.7369, GNorm = 0.9240, lr_0 = 4.7118e-04
Loss = 8.9075e-02, PNorm = 61.7489, GNorm = 0.4239, lr_0 = 4.7085e-04
Loss = 9.9743e-02, PNorm = 61.7644, GNorm = 1.7569, lr_0 = 4.7053e-04
Loss = 9.3996e-02, PNorm = 61.7781, GNorm = 0.7213, lr_0 = 4.7021e-04
Loss = 8.1615e-02, PNorm = 61.7904, GNorm = 0.5142, lr_0 = 4.6989e-04
Loss = 7.8959e-02, PNorm = 61.8004, GNorm = 0.4621, lr_0 = 4.6957e-04
Loss = 8.5367e-02, PNorm = 61.8165, GNorm = 0.5654, lr_0 = 4.6924e-04
Loss = 9.2641e-02, PNorm = 61.8331, GNorm = 0.5100, lr_0 = 4.6892e-04
Loss = 8.3565e-02, PNorm = 61.8524, GNorm = 0.5408, lr_0 = 4.6860e-04
Loss = 8.2963e-02, PNorm = 61.8683, GNorm = 0.8205, lr_0 = 4.6828e-04
Loss = 7.3200e-02, PNorm = 61.8797, GNorm = 1.0625, lr_0 = 4.6796e-04
Loss = 8.0396e-02, PNorm = 61.8953, GNorm = 0.4920, lr_0 = 4.6764e-04
Loss = 8.6487e-02, PNorm = 61.9042, GNorm = 0.8376, lr_0 = 4.6732e-04
Loss = 8.0805e-02, PNorm = 61.9082, GNorm = 1.1158, lr_0 = 4.6700e-04
Loss = 9.6612e-02, PNorm = 61.9190, GNorm = 0.6670, lr_0 = 4.6668e-04
Loss = 8.5357e-02, PNorm = 61.9305, GNorm = 0.6692, lr_0 = 4.6636e-04
Loss = 8.9230e-02, PNorm = 61.9379, GNorm = 0.7392, lr_0 = 4.6604e-04
Loss = 8.9787e-02, PNorm = 61.9507, GNorm = 1.0480, lr_0 = 4.6572e-04
Loss = 9.2235e-02, PNorm = 61.9690, GNorm = 1.3219, lr_0 = 4.6540e-04
Loss = 8.2750e-02, PNorm = 61.9827, GNorm = 0.5726, lr_0 = 4.6508e-04
Loss = 8.3502e-02, PNorm = 61.9968, GNorm = 0.6016, lr_0 = 4.6476e-04
Loss = 1.0320e-01, PNorm = 62.0138, GNorm = 0.8005, lr_0 = 4.6445e-04
Loss = 1.0303e-01, PNorm = 62.0264, GNorm = 0.8738, lr_0 = 4.6413e-04
Loss = 8.7783e-02, PNorm = 62.0392, GNorm = 0.5786, lr_0 = 4.6381e-04
Loss = 9.9164e-02, PNorm = 62.0460, GNorm = 0.7545, lr_0 = 4.6349e-04
Loss = 9.3989e-02, PNorm = 62.0561, GNorm = 1.2174, lr_0 = 4.6317e-04
Loss = 8.0828e-02, PNorm = 62.0702, GNorm = 0.6364, lr_0 = 4.6286e-04
Loss = 9.4566e-02, PNorm = 62.0820, GNorm = 0.4769, lr_0 = 4.6254e-04
Loss = 9.1615e-02, PNorm = 62.0936, GNorm = 0.7060, lr_0 = 4.6222e-04
Loss = 9.2222e-02, PNorm = 62.1013, GNorm = 0.6664, lr_0 = 4.6191e-04
Loss = 7.2791e-02, PNorm = 62.1089, GNorm = 0.5915, lr_0 = 4.6159e-04
Loss = 6.8144e-02, PNorm = 62.1191, GNorm = 0.4645, lr_0 = 4.6127e-04
Loss = 9.9212e-02, PNorm = 62.1296, GNorm = 0.6835, lr_0 = 4.6096e-04
Loss = 9.4639e-02, PNorm = 62.1400, GNorm = 0.5530, lr_0 = 4.6064e-04
Loss = 8.9185e-02, PNorm = 62.1528, GNorm = 0.7639, lr_0 = 4.6033e-04
Loss = 9.2979e-02, PNorm = 62.1664, GNorm = 1.1070, lr_0 = 4.6001e-04
Loss = 1.0125e-01, PNorm = 62.1822, GNorm = 0.6099, lr_0 = 4.5970e-04
Loss = 8.1046e-02, PNorm = 62.1942, GNorm = 0.9207, lr_0 = 4.5938e-04
Loss = 9.7377e-02, PNorm = 62.2024, GNorm = 0.7469, lr_0 = 4.5907e-04
Loss = 7.9021e-02, PNorm = 62.2114, GNorm = 0.6771, lr_0 = 4.5875e-04
Loss = 7.3928e-02, PNorm = 62.2213, GNorm = 0.4991, lr_0 = 4.5844e-04
Loss = 1.0056e-01, PNorm = 62.2296, GNorm = 0.6776, lr_0 = 4.5812e-04
Loss = 9.7377e-02, PNorm = 62.2376, GNorm = 0.5634, lr_0 = 4.5781e-04
Loss = 8.7274e-02, PNorm = 62.2477, GNorm = 0.5158, lr_0 = 4.5750e-04
Loss = 8.6445e-02, PNorm = 62.2587, GNorm = 1.0160, lr_0 = 4.5718e-04
Loss = 7.7800e-02, PNorm = 62.2711, GNorm = 0.6095, lr_0 = 4.5687e-04
Loss = 8.2642e-02, PNorm = 62.2829, GNorm = 0.4437, lr_0 = 4.5656e-04
Loss = 8.1640e-02, PNorm = 62.2907, GNorm = 0.4692, lr_0 = 4.5624e-04
Loss = 8.2160e-02, PNorm = 62.3044, GNorm = 0.5925, lr_0 = 4.5593e-04
Loss = 8.5446e-02, PNorm = 62.3175, GNorm = 0.4848, lr_0 = 4.5562e-04
Loss = 9.3739e-02, PNorm = 62.3240, GNorm = 0.5672, lr_0 = 4.5531e-04
Loss = 8.3431e-02, PNorm = 62.3334, GNorm = 0.7854, lr_0 = 4.5499e-04
Loss = 9.2650e-02, PNorm = 62.3428, GNorm = 0.8061, lr_0 = 4.5468e-04
Loss = 8.1754e-02, PNorm = 62.3541, GNorm = 0.7605, lr_0 = 4.5437e-04
Loss = 8.0455e-02, PNorm = 62.3698, GNorm = 0.5518, lr_0 = 4.5406e-04
Loss = 8.0653e-02, PNorm = 62.3800, GNorm = 0.4272, lr_0 = 4.5375e-04
Loss = 8.4933e-02, PNorm = 62.3932, GNorm = 0.6227, lr_0 = 4.5344e-04
Loss = 9.9896e-02, PNorm = 62.4081, GNorm = 0.8219, lr_0 = 4.5313e-04
Loss = 8.4057e-02, PNorm = 62.4177, GNorm = 0.5432, lr_0 = 4.5282e-04
Loss = 9.1903e-02, PNorm = 62.4243, GNorm = 0.5809, lr_0 = 4.5251e-04
Loss = 8.8446e-02, PNorm = 62.4339, GNorm = 0.8397, lr_0 = 4.5220e-04
Loss = 9.8192e-02, PNorm = 62.4484, GNorm = 0.5940, lr_0 = 4.5189e-04
Loss = 8.1223e-02, PNorm = 62.4672, GNorm = 0.5946, lr_0 = 4.5158e-04
Loss = 1.0169e-01, PNorm = 62.4767, GNorm = 0.7008, lr_0 = 4.5127e-04
Loss = 8.6333e-02, PNorm = 62.4862, GNorm = 0.6679, lr_0 = 4.5096e-04
Loss = 1.0606e-01, PNorm = 62.4925, GNorm = 0.8649, lr_0 = 4.5065e-04
Loss = 8.4108e-02, PNorm = 62.5009, GNorm = 0.6464, lr_0 = 4.5034e-04
Loss = 8.3092e-02, PNorm = 62.5126, GNorm = 0.4329, lr_0 = 4.5003e-04
Loss = 8.0489e-02, PNorm = 62.5228, GNorm = 1.0004, lr_0 = 4.4972e-04
Loss = 8.9033e-02, PNorm = 62.5334, GNorm = 0.8583, lr_0 = 4.4942e-04
Loss = 8.3033e-02, PNorm = 62.5428, GNorm = 0.4684, lr_0 = 4.4911e-04
Loss = 9.1654e-02, PNorm = 62.5486, GNorm = 0.8187, lr_0 = 4.4880e-04
Loss = 9.8106e-02, PNorm = 62.5538, GNorm = 0.7423, lr_0 = 4.4849e-04
Loss = 7.6801e-02, PNorm = 62.5616, GNorm = 0.4530, lr_0 = 4.4819e-04
Loss = 8.4483e-02, PNorm = 62.5722, GNorm = 0.6211, lr_0 = 4.4788e-04
Loss = 8.6585e-02, PNorm = 62.5844, GNorm = 0.8346, lr_0 = 4.4757e-04
Loss = 8.7641e-02, PNorm = 62.5946, GNorm = 0.7370, lr_0 = 4.4727e-04
Loss = 1.0620e-01, PNorm = 62.6072, GNorm = 0.7252, lr_0 = 4.4696e-04
Loss = 7.9837e-02, PNorm = 62.6182, GNorm = 0.4004, lr_0 = 4.4665e-04
Loss = 7.7769e-02, PNorm = 62.6272, GNorm = 0.6183, lr_0 = 4.4635e-04
Loss = 8.8928e-02, PNorm = 62.6380, GNorm = 0.9029, lr_0 = 4.4604e-04
Loss = 8.6041e-02, PNorm = 62.6497, GNorm = 0.8902, lr_0 = 4.4574e-04
Loss = 1.0056e-01, PNorm = 62.6627, GNorm = 0.7593, lr_0 = 4.4543e-04
Loss = 8.7603e-02, PNorm = 62.6745, GNorm = 0.5493, lr_0 = 4.4513e-04
Loss = 8.5766e-02, PNorm = 62.6864, GNorm = 1.1366, lr_0 = 4.4482e-04
Loss = 8.3076e-02, PNorm = 62.6948, GNorm = 0.6827, lr_0 = 4.4452e-04
Loss = 8.1939e-02, PNorm = 62.7024, GNorm = 0.4390, lr_0 = 4.4421e-04
Loss = 9.0089e-02, PNorm = 62.7094, GNorm = 0.9393, lr_0 = 4.4391e-04
Loss = 9.0305e-02, PNorm = 62.7200, GNorm = 0.5491, lr_0 = 4.4360e-04
Loss = 9.4981e-02, PNorm = 62.7308, GNorm = 0.9725, lr_0 = 4.4330e-04
Loss = 8.8519e-02, PNorm = 62.7382, GNorm = 0.9491, lr_0 = 4.4299e-04
Loss = 8.6590e-02, PNorm = 62.7458, GNorm = 0.5970, lr_0 = 4.4269e-04
Loss = 9.3558e-02, PNorm = 62.7513, GNorm = 1.2333, lr_0 = 4.4239e-04
Loss = 8.0802e-02, PNorm = 62.7574, GNorm = 1.1416, lr_0 = 4.4209e-04
Loss = 8.7068e-02, PNorm = 62.7690, GNorm = 0.7098, lr_0 = 4.4178e-04
Loss = 8.0678e-02, PNorm = 62.7805, GNorm = 0.8816, lr_0 = 4.4148e-04
Loss = 8.6063e-02, PNorm = 62.7904, GNorm = 0.5174, lr_0 = 4.4118e-04
Loss = 8.7079e-02, PNorm = 62.7989, GNorm = 0.6694, lr_0 = 4.4088e-04
Loss = 9.9169e-02, PNorm = 62.8092, GNorm = 0.7965, lr_0 = 4.4057e-04
Loss = 7.2964e-02, PNorm = 62.8210, GNorm = 0.5091, lr_0 = 4.4027e-04
Loss = 8.9949e-02, PNorm = 62.8276, GNorm = 0.5385, lr_0 = 4.3997e-04
Loss = 8.6188e-02, PNorm = 62.8353, GNorm = 0.4781, lr_0 = 4.3967e-04
Loss = 7.9757e-02, PNorm = 62.8427, GNorm = 0.5864, lr_0 = 4.3937e-04
Validation mae = 0.395166
Epoch 12
Loss = 8.0420e-02, PNorm = 62.8484, GNorm = 0.8998, lr_0 = 4.3907e-04
Loss = 7.8229e-02, PNorm = 62.8619, GNorm = 0.9172, lr_0 = 4.3877e-04
Loss = 7.2539e-02, PNorm = 62.8724, GNorm = 0.8356, lr_0 = 4.3846e-04
Loss = 8.4780e-02, PNorm = 62.8822, GNorm = 1.1651, lr_0 = 4.3816e-04
Loss = 7.6910e-02, PNorm = 62.8952, GNorm = 0.5745, lr_0 = 4.3786e-04
Loss = 7.7964e-02, PNorm = 62.9084, GNorm = 1.3163, lr_0 = 4.3756e-04
Loss = 7.5341e-02, PNorm = 62.9132, GNorm = 0.5014, lr_0 = 4.3726e-04
Loss = 8.1422e-02, PNorm = 62.9195, GNorm = 0.8410, lr_0 = 4.3696e-04
Loss = 7.8620e-02, PNorm = 62.9276, GNorm = 0.4695, lr_0 = 4.3667e-04
Loss = 6.6956e-02, PNorm = 62.9360, GNorm = 1.1218, lr_0 = 4.3637e-04
Loss = 7.6697e-02, PNorm = 62.9452, GNorm = 0.6478, lr_0 = 4.3607e-04
Loss = 7.8706e-02, PNorm = 62.9549, GNorm = 0.8684, lr_0 = 4.3577e-04
Loss = 7.3799e-02, PNorm = 62.9626, GNorm = 0.7167, lr_0 = 4.3547e-04
Loss = 7.7399e-02, PNorm = 62.9722, GNorm = 0.6199, lr_0 = 4.3517e-04
Loss = 7.1158e-02, PNorm = 62.9818, GNorm = 0.7341, lr_0 = 4.3487e-04
Loss = 8.2706e-02, PNorm = 62.9915, GNorm = 0.9677, lr_0 = 4.3458e-04
Loss = 7.8375e-02, PNorm = 63.0021, GNorm = 0.5623, lr_0 = 4.3428e-04
Loss = 7.7128e-02, PNorm = 63.0099, GNorm = 0.5932, lr_0 = 4.3398e-04
Loss = 8.7237e-02, PNorm = 63.0161, GNorm = 0.8282, lr_0 = 4.3368e-04
Loss = 8.7293e-02, PNorm = 63.0243, GNorm = 0.8598, lr_0 = 4.3339e-04
Loss = 8.1489e-02, PNorm = 63.0344, GNorm = 0.8592, lr_0 = 4.3309e-04
Loss = 7.4609e-02, PNorm = 63.0472, GNorm = 0.4871, lr_0 = 4.3279e-04
Loss = 7.2437e-02, PNorm = 63.0584, GNorm = 0.5715, lr_0 = 4.3250e-04
Loss = 7.8346e-02, PNorm = 63.0649, GNorm = 0.4313, lr_0 = 4.3220e-04
Loss = 9.3504e-02, PNorm = 63.0710, GNorm = 0.8084, lr_0 = 4.3190e-04
Loss = 7.9258e-02, PNorm = 63.0804, GNorm = 0.5249, lr_0 = 4.3161e-04
Loss = 6.7094e-02, PNorm = 63.0913, GNorm = 0.4496, lr_0 = 4.3131e-04
Loss = 8.5684e-02, PNorm = 63.1020, GNorm = 0.6903, lr_0 = 4.3102e-04
Loss = 8.0680e-02, PNorm = 63.1114, GNorm = 0.5244, lr_0 = 4.3072e-04
Loss = 7.1450e-02, PNorm = 63.1227, GNorm = 0.5245, lr_0 = 4.3043e-04
Loss = 7.5908e-02, PNorm = 63.1356, GNorm = 0.7144, lr_0 = 4.3013e-04
Loss = 8.1463e-02, PNorm = 63.1444, GNorm = 0.4698, lr_0 = 4.2984e-04
Loss = 6.7816e-02, PNorm = 63.1498, GNorm = 0.5563, lr_0 = 4.2954e-04
Loss = 7.2685e-02, PNorm = 63.1578, GNorm = 0.4929, lr_0 = 4.2925e-04
Loss = 9.0186e-02, PNorm = 63.1640, GNorm = 1.2060, lr_0 = 4.2895e-04
Loss = 7.5072e-02, PNorm = 63.1700, GNorm = 1.1110, lr_0 = 4.2866e-04
Loss = 8.2466e-02, PNorm = 63.1845, GNorm = 0.5791, lr_0 = 4.2837e-04
Loss = 7.7588e-02, PNorm = 63.1974, GNorm = 0.6172, lr_0 = 4.2807e-04
Loss = 8.1445e-02, PNorm = 63.2076, GNorm = 0.6277, lr_0 = 4.2778e-04
Loss = 7.7127e-02, PNorm = 63.2204, GNorm = 0.6434, lr_0 = 4.2749e-04
Loss = 8.1998e-02, PNorm = 63.2348, GNorm = 0.4982, lr_0 = 4.2719e-04
Loss = 8.5949e-02, PNorm = 63.2475, GNorm = 0.5668, lr_0 = 4.2690e-04
Loss = 8.4993e-02, PNorm = 63.2591, GNorm = 0.5919, lr_0 = 4.2661e-04
Loss = 7.7873e-02, PNorm = 63.2717, GNorm = 0.8468, lr_0 = 4.2632e-04
Loss = 7.9726e-02, PNorm = 63.2798, GNorm = 0.5452, lr_0 = 4.2602e-04
Loss = 7.6829e-02, PNorm = 63.2916, GNorm = 0.6022, lr_0 = 4.2573e-04
Loss = 7.2787e-02, PNorm = 63.3042, GNorm = 0.6824, lr_0 = 4.2544e-04
Loss = 8.6162e-02, PNorm = 63.3128, GNorm = 0.4697, lr_0 = 4.2515e-04
Loss = 8.2998e-02, PNorm = 63.3211, GNorm = 0.5547, lr_0 = 4.2486e-04
Loss = 7.7664e-02, PNorm = 63.3276, GNorm = 0.5769, lr_0 = 4.2457e-04
Loss = 7.7381e-02, PNorm = 63.3353, GNorm = 0.5040, lr_0 = 4.2428e-04
Loss = 7.6691e-02, PNorm = 63.3447, GNorm = 1.2370, lr_0 = 4.2399e-04
Loss = 6.6819e-02, PNorm = 63.3550, GNorm = 0.6965, lr_0 = 4.2370e-04
Loss = 8.8023e-02, PNorm = 63.3671, GNorm = 0.5703, lr_0 = 4.2340e-04
Loss = 7.9962e-02, PNorm = 63.3758, GNorm = 0.5715, lr_0 = 4.2311e-04
Loss = 8.0603e-02, PNorm = 63.3869, GNorm = 0.6247, lr_0 = 4.2283e-04
Loss = 7.4084e-02, PNorm = 63.3972, GNorm = 0.9255, lr_0 = 4.2254e-04
Loss = 8.1378e-02, PNorm = 63.4027, GNorm = 0.6160, lr_0 = 4.2225e-04
Loss = 7.7853e-02, PNorm = 63.4118, GNorm = 1.2114, lr_0 = 4.2196e-04
Loss = 8.1403e-02, PNorm = 63.4260, GNorm = 0.7349, lr_0 = 4.2167e-04
Loss = 7.0539e-02, PNorm = 63.4371, GNorm = 0.4314, lr_0 = 4.2138e-04
Loss = 7.7909e-02, PNorm = 63.4487, GNorm = 0.6076, lr_0 = 4.2109e-04
Loss = 7.5553e-02, PNorm = 63.4592, GNorm = 1.0385, lr_0 = 4.2080e-04
Loss = 8.9249e-02, PNorm = 63.4709, GNorm = 1.0257, lr_0 = 4.2051e-04
Loss = 7.7130e-02, PNorm = 63.4778, GNorm = 0.6365, lr_0 = 4.2023e-04
Loss = 7.7942e-02, PNorm = 63.4913, GNorm = 0.5062, lr_0 = 4.1994e-04
Loss = 9.4592e-02, PNorm = 63.5041, GNorm = 0.4956, lr_0 = 4.1965e-04
Loss = 9.1731e-02, PNorm = 63.5145, GNorm = 0.6071, lr_0 = 4.1936e-04
Loss = 8.2768e-02, PNorm = 63.5216, GNorm = 0.6929, lr_0 = 4.1907e-04
Loss = 8.4042e-02, PNorm = 63.5321, GNorm = 0.4979, lr_0 = 4.1879e-04
Loss = 8.8619e-02, PNorm = 63.5455, GNorm = 0.7032, lr_0 = 4.1850e-04
Loss = 8.7633e-02, PNorm = 63.5536, GNorm = 0.6360, lr_0 = 4.1821e-04
Loss = 7.7224e-02, PNorm = 63.5601, GNorm = 0.6214, lr_0 = 4.1793e-04
Loss = 8.8452e-02, PNorm = 63.5693, GNorm = 0.6779, lr_0 = 4.1764e-04
Loss = 8.2035e-02, PNorm = 63.5792, GNorm = 0.4343, lr_0 = 4.1736e-04
Loss = 7.3247e-02, PNorm = 63.5892, GNorm = 0.9109, lr_0 = 4.1707e-04
Loss = 8.7393e-02, PNorm = 63.5960, GNorm = 0.7148, lr_0 = 4.1678e-04
Loss = 7.9154e-02, PNorm = 63.6064, GNorm = 0.9477, lr_0 = 4.1650e-04
Loss = 9.1117e-02, PNorm = 63.6185, GNorm = 1.1202, lr_0 = 4.1621e-04
Loss = 8.4844e-02, PNorm = 63.6256, GNorm = 0.5441, lr_0 = 4.1593e-04
Loss = 6.9373e-02, PNorm = 63.6327, GNorm = 0.8474, lr_0 = 4.1564e-04
Loss = 7.3346e-02, PNorm = 63.6379, GNorm = 0.5565, lr_0 = 4.1536e-04
Loss = 8.8882e-02, PNorm = 63.6408, GNorm = 0.6951, lr_0 = 4.1507e-04
Loss = 8.5512e-02, PNorm = 63.6508, GNorm = 0.6357, lr_0 = 4.1479e-04
Loss = 7.0193e-02, PNorm = 63.6594, GNorm = 0.4913, lr_0 = 4.1450e-04
Loss = 8.0830e-02, PNorm = 63.6668, GNorm = 0.6840, lr_0 = 4.1422e-04
Loss = 7.6669e-02, PNorm = 63.6732, GNorm = 0.7938, lr_0 = 4.1394e-04
Loss = 7.8427e-02, PNorm = 63.6838, GNorm = 0.5207, lr_0 = 4.1365e-04
Loss = 7.7773e-02, PNorm = 63.6959, GNorm = 0.5551, lr_0 = 4.1337e-04
Loss = 7.7772e-02, PNorm = 63.7073, GNorm = 0.7051, lr_0 = 4.1309e-04
Loss = 9.5587e-02, PNorm = 63.7197, GNorm = 0.6481, lr_0 = 4.1280e-04
Loss = 8.4653e-02, PNorm = 63.7313, GNorm = 0.5106, lr_0 = 4.1252e-04
Loss = 8.8696e-02, PNorm = 63.7381, GNorm = 0.9678, lr_0 = 4.1224e-04
Loss = 1.0209e-01, PNorm = 63.7465, GNorm = 0.6714, lr_0 = 4.1196e-04
Loss = 8.9768e-02, PNorm = 63.7610, GNorm = 0.5972, lr_0 = 4.1167e-04
Loss = 8.0218e-02, PNorm = 63.7723, GNorm = 0.8415, lr_0 = 4.1139e-04
Loss = 8.7116e-02, PNorm = 63.7798, GNorm = 0.7427, lr_0 = 4.1111e-04
Loss = 9.1381e-02, PNorm = 63.7877, GNorm = 0.8124, lr_0 = 4.1083e-04
Loss = 9.9024e-02, PNorm = 63.7998, GNorm = 1.1120, lr_0 = 4.1055e-04
Loss = 8.1609e-02, PNorm = 63.8139, GNorm = 0.5231, lr_0 = 4.1027e-04
Loss = 8.6010e-02, PNorm = 63.8230, GNorm = 0.5783, lr_0 = 4.0998e-04
Loss = 7.5840e-02, PNorm = 63.8301, GNorm = 0.5713, lr_0 = 4.0970e-04
Loss = 7.8574e-02, PNorm = 63.8377, GNorm = 0.9489, lr_0 = 4.0942e-04
Loss = 8.2429e-02, PNorm = 63.8486, GNorm = 0.5729, lr_0 = 4.0914e-04
Loss = 8.9965e-02, PNorm = 63.8542, GNorm = 0.5874, lr_0 = 4.0886e-04
Loss = 8.2641e-02, PNorm = 63.8593, GNorm = 0.6006, lr_0 = 4.0858e-04
Loss = 9.2938e-02, PNorm = 63.8700, GNorm = 0.5394, lr_0 = 4.0830e-04
Loss = 8.0954e-02, PNorm = 63.8782, GNorm = 0.6958, lr_0 = 4.0802e-04
Loss = 9.5105e-02, PNorm = 63.8855, GNorm = 0.5142, lr_0 = 4.0774e-04
Loss = 8.6980e-02, PNorm = 63.8953, GNorm = 1.0789, lr_0 = 4.0746e-04
Loss = 7.4712e-02, PNorm = 63.9015, GNorm = 0.5582, lr_0 = 4.0718e-04
Loss = 8.5858e-02, PNorm = 63.9138, GNorm = 0.5578, lr_0 = 4.0691e-04
Loss = 7.5699e-02, PNorm = 63.9238, GNorm = 0.7720, lr_0 = 4.0663e-04
Loss = 7.8851e-02, PNorm = 63.9338, GNorm = 0.4339, lr_0 = 4.0635e-04
Loss = 8.3019e-02, PNorm = 63.9430, GNorm = 0.6133, lr_0 = 4.0607e-04
Loss = 8.0427e-02, PNorm = 63.9515, GNorm = 0.7989, lr_0 = 4.0579e-04
Loss = 8.4949e-02, PNorm = 63.9558, GNorm = 0.4615, lr_0 = 4.0551e-04
Loss = 8.0322e-02, PNorm = 63.9627, GNorm = 0.5620, lr_0 = 4.0524e-04
Loss = 7.6269e-02, PNorm = 63.9681, GNorm = 0.6828, lr_0 = 4.0496e-04
Loss = 9.2304e-02, PNorm = 63.9752, GNorm = 0.4905, lr_0 = 4.0468e-04
Validation mae = 0.392549
Epoch 13
Loss = 7.9462e-02, PNorm = 63.9850, GNorm = 0.5101, lr_0 = 4.0440e-04
Loss = 7.1550e-02, PNorm = 63.9949, GNorm = 0.7042, lr_0 = 4.0413e-04
Loss = 7.3650e-02, PNorm = 64.0066, GNorm = 0.4855, lr_0 = 4.0385e-04
Loss = 6.5418e-02, PNorm = 64.0181, GNorm = 0.5559, lr_0 = 4.0357e-04
Loss = 7.3032e-02, PNorm = 64.0227, GNorm = 0.7602, lr_0 = 4.0330e-04
Loss = 7.4984e-02, PNorm = 64.0289, GNorm = 0.5576, lr_0 = 4.0302e-04
Loss = 7.2650e-02, PNorm = 64.0359, GNorm = 0.8269, lr_0 = 4.0274e-04
Loss = 6.4821e-02, PNorm = 64.0466, GNorm = 0.5993, lr_0 = 4.0247e-04
Loss = 6.6953e-02, PNorm = 64.0536, GNorm = 0.5253, lr_0 = 4.0219e-04
Loss = 6.2602e-02, PNorm = 64.0589, GNorm = 0.6005, lr_0 = 4.0192e-04
Loss = 7.5157e-02, PNorm = 64.0665, GNorm = 0.8273, lr_0 = 4.0164e-04
Loss = 8.4105e-02, PNorm = 64.0794, GNorm = 0.5997, lr_0 = 4.0137e-04
Loss = 7.0431e-02, PNorm = 64.0914, GNorm = 0.8017, lr_0 = 4.0109e-04
Loss = 7.7564e-02, PNorm = 64.1066, GNorm = 0.6545, lr_0 = 4.0082e-04
Loss = 6.1811e-02, PNorm = 64.1153, GNorm = 0.8522, lr_0 = 4.0054e-04
Loss = 7.7182e-02, PNorm = 64.1203, GNorm = 0.8245, lr_0 = 4.0027e-04
Loss = 9.5002e-02, PNorm = 64.1253, GNorm = 0.6817, lr_0 = 3.9999e-04
Loss = 8.4520e-02, PNorm = 64.1366, GNorm = 0.6643, lr_0 = 3.9972e-04
Loss = 8.4465e-02, PNorm = 64.1487, GNorm = 0.5464, lr_0 = 3.9945e-04
Loss = 8.8663e-02, PNorm = 64.1598, GNorm = 1.1847, lr_0 = 3.9917e-04
Loss = 6.3369e-02, PNorm = 64.1684, GNorm = 0.5253, lr_0 = 3.9890e-04
Loss = 7.5606e-02, PNorm = 64.1754, GNorm = 0.4557, lr_0 = 3.9863e-04
Loss = 6.9006e-02, PNorm = 64.1797, GNorm = 0.5541, lr_0 = 3.9835e-04
Loss = 7.1565e-02, PNorm = 64.1874, GNorm = 0.6768, lr_0 = 3.9808e-04
Loss = 7.2045e-02, PNorm = 64.1920, GNorm = 0.4218, lr_0 = 3.9781e-04
Loss = 7.3880e-02, PNorm = 64.1997, GNorm = 0.7183, lr_0 = 3.9753e-04
Loss = 7.6457e-02, PNorm = 64.2096, GNorm = 0.9844, lr_0 = 3.9726e-04
Loss = 7.7102e-02, PNorm = 64.2186, GNorm = 0.4325, lr_0 = 3.9699e-04
Loss = 8.4207e-02, PNorm = 64.2308, GNorm = 0.9835, lr_0 = 3.9672e-04
Loss = 8.4157e-02, PNorm = 64.2431, GNorm = 0.5138, lr_0 = 3.9645e-04
Loss = 8.3975e-02, PNorm = 64.2554, GNorm = 0.8014, lr_0 = 3.9617e-04
Loss = 6.7666e-02, PNorm = 64.2668, GNorm = 0.6608, lr_0 = 3.9590e-04
Loss = 6.6152e-02, PNorm = 64.2731, GNorm = 0.5001, lr_0 = 3.9563e-04
Loss = 6.9313e-02, PNorm = 64.2784, GNorm = 0.7924, lr_0 = 3.9536e-04
Loss = 7.0095e-02, PNorm = 64.2874, GNorm = 0.4966, lr_0 = 3.9509e-04
Loss = 6.7186e-02, PNorm = 64.2967, GNorm = 0.5699, lr_0 = 3.9482e-04
Loss = 7.8837e-02, PNorm = 64.3018, GNorm = 0.7919, lr_0 = 3.9455e-04
Loss = 7.4556e-02, PNorm = 64.3052, GNorm = 0.4332, lr_0 = 3.9428e-04
Loss = 6.6172e-02, PNorm = 64.3105, GNorm = 0.5966, lr_0 = 3.9401e-04
Loss = 8.2982e-02, PNorm = 64.3169, GNorm = 0.6407, lr_0 = 3.9374e-04
Loss = 6.4746e-02, PNorm = 64.3261, GNorm = 0.6689, lr_0 = 3.9347e-04
Loss = 8.6518e-02, PNorm = 64.3379, GNorm = 0.6096, lr_0 = 3.9320e-04
Loss = 7.6187e-02, PNorm = 64.3476, GNorm = 0.6074, lr_0 = 3.9293e-04
Loss = 7.1139e-02, PNorm = 64.3564, GNorm = 0.5359, lr_0 = 3.9266e-04
Loss = 6.8304e-02, PNorm = 64.3650, GNorm = 0.4676, lr_0 = 3.9239e-04
Loss = 7.0067e-02, PNorm = 64.3727, GNorm = 0.8053, lr_0 = 3.9212e-04
Loss = 6.4242e-02, PNorm = 64.3819, GNorm = 0.4985, lr_0 = 3.9185e-04
Loss = 7.0220e-02, PNorm = 64.3888, GNorm = 0.5442, lr_0 = 3.9159e-04
Loss = 7.2878e-02, PNorm = 64.3944, GNorm = 0.7778, lr_0 = 3.9132e-04
Loss = 8.0228e-02, PNorm = 64.3999, GNorm = 0.5094, lr_0 = 3.9105e-04
Loss = 8.1543e-02, PNorm = 64.4068, GNorm = 0.6697, lr_0 = 3.9078e-04
Loss = 7.7462e-02, PNorm = 64.4158, GNorm = 0.6481, lr_0 = 3.9051e-04
Loss = 8.1123e-02, PNorm = 64.4226, GNorm = 0.5951, lr_0 = 3.9025e-04
Loss = 6.7877e-02, PNorm = 64.4279, GNorm = 0.5827, lr_0 = 3.8998e-04
Loss = 7.4480e-02, PNorm = 64.4364, GNorm = 0.5689, lr_0 = 3.8971e-04
Loss = 7.7916e-02, PNorm = 64.4478, GNorm = 0.7297, lr_0 = 3.8945e-04
Loss = 7.8162e-02, PNorm = 64.4563, GNorm = 0.5828, lr_0 = 3.8918e-04
Loss = 7.8576e-02, PNorm = 64.4626, GNorm = 0.5689, lr_0 = 3.8891e-04
Loss = 7.1562e-02, PNorm = 64.4703, GNorm = 0.5564, lr_0 = 3.8865e-04
Loss = 6.8257e-02, PNorm = 64.4793, GNorm = 0.9545, lr_0 = 3.8838e-04
Loss = 7.8749e-02, PNorm = 64.4890, GNorm = 0.9417, lr_0 = 3.8811e-04
Loss = 6.5966e-02, PNorm = 64.4976, GNorm = 0.8142, lr_0 = 3.8785e-04
Loss = 7.5562e-02, PNorm = 64.5056, GNorm = 0.7149, lr_0 = 3.8758e-04
Loss = 7.1729e-02, PNorm = 64.5139, GNorm = 0.4839, lr_0 = 3.8732e-04
Loss = 8.6580e-02, PNorm = 64.5232, GNorm = 0.6900, lr_0 = 3.8705e-04
Loss = 7.4750e-02, PNorm = 64.5327, GNorm = 0.5777, lr_0 = 3.8679e-04
Loss = 7.7464e-02, PNorm = 64.5389, GNorm = 0.6030, lr_0 = 3.8652e-04
Loss = 7.2392e-02, PNorm = 64.5474, GNorm = 1.3661, lr_0 = 3.8626e-04
Loss = 8.0409e-02, PNorm = 64.5567, GNorm = 1.0328, lr_0 = 3.8599e-04
Loss = 8.2756e-02, PNorm = 64.5636, GNorm = 0.9701, lr_0 = 3.8573e-04
Loss = 7.5322e-02, PNorm = 64.5749, GNorm = 0.5556, lr_0 = 3.8546e-04
Loss = 8.0076e-02, PNorm = 64.5878, GNorm = 0.5559, lr_0 = 3.8520e-04
Loss = 7.7422e-02, PNorm = 64.5975, GNorm = 0.6929, lr_0 = 3.8493e-04
Loss = 8.2305e-02, PNorm = 64.6067, GNorm = 0.7400, lr_0 = 3.8467e-04
Loss = 7.0020e-02, PNorm = 64.6135, GNorm = 0.5178, lr_0 = 3.8441e-04
Loss = 7.2584e-02, PNorm = 64.6200, GNorm = 0.6836, lr_0 = 3.8414e-04
Loss = 7.9465e-02, PNorm = 64.6272, GNorm = 0.7303, lr_0 = 3.8388e-04
Loss = 7.7295e-02, PNorm = 64.6307, GNorm = 0.6792, lr_0 = 3.8362e-04
Loss = 7.6592e-02, PNorm = 64.6355, GNorm = 1.0701, lr_0 = 3.8336e-04
Loss = 8.5005e-02, PNorm = 64.6439, GNorm = 0.5141, lr_0 = 3.8309e-04
Loss = 6.7040e-02, PNorm = 64.6505, GNorm = 0.4891, lr_0 = 3.8283e-04
Loss = 7.0911e-02, PNorm = 64.6613, GNorm = 0.6428, lr_0 = 3.8257e-04
Loss = 9.2706e-02, PNorm = 64.6733, GNorm = 0.6187, lr_0 = 3.8231e-04
Loss = 7.1299e-02, PNorm = 64.6829, GNorm = 0.6538, lr_0 = 3.8204e-04
Loss = 7.5034e-02, PNorm = 64.6893, GNorm = 0.5488, lr_0 = 3.8178e-04
Loss = 7.9668e-02, PNorm = 64.6966, GNorm = 0.7181, lr_0 = 3.8152e-04
Loss = 6.9052e-02, PNorm = 64.7047, GNorm = 0.7445, lr_0 = 3.8126e-04
Loss = 7.9732e-02, PNorm = 64.7122, GNorm = 0.7541, lr_0 = 3.8100e-04
Loss = 7.4860e-02, PNorm = 64.7194, GNorm = 0.5083, lr_0 = 3.8074e-04
Loss = 6.9330e-02, PNorm = 64.7288, GNorm = 0.4493, lr_0 = 3.8048e-04
Loss = 8.4135e-02, PNorm = 64.7392, GNorm = 1.1013, lr_0 = 3.8022e-04
Loss = 8.2771e-02, PNorm = 64.7499, GNorm = 0.9577, lr_0 = 3.7995e-04
Loss = 8.4687e-02, PNorm = 64.7587, GNorm = 0.8979, lr_0 = 3.7969e-04
Loss = 7.2597e-02, PNorm = 64.7735, GNorm = 0.8005, lr_0 = 3.7943e-04
Loss = 7.9514e-02, PNorm = 64.7842, GNorm = 0.4136, lr_0 = 3.7917e-04
Loss = 8.6437e-02, PNorm = 64.7942, GNorm = 0.5838, lr_0 = 3.7891e-04
Loss = 7.6024e-02, PNorm = 64.8033, GNorm = 0.5964, lr_0 = 3.7866e-04
Loss = 7.7304e-02, PNorm = 64.8134, GNorm = 0.6257, lr_0 = 3.7840e-04
Loss = 7.3928e-02, PNorm = 64.8216, GNorm = 0.7807, lr_0 = 3.7814e-04
Loss = 7.3574e-02, PNorm = 64.8306, GNorm = 0.7769, lr_0 = 3.7788e-04
Loss = 7.1682e-02, PNorm = 64.8402, GNorm = 0.5249, lr_0 = 3.7762e-04
Loss = 7.2641e-02, PNorm = 64.8481, GNorm = 0.5528, lr_0 = 3.7736e-04
Loss = 6.5553e-02, PNorm = 64.8544, GNorm = 0.3912, lr_0 = 3.7710e-04
Loss = 7.5896e-02, PNorm = 64.8613, GNorm = 0.5200, lr_0 = 3.7684e-04
Loss = 7.0160e-02, PNorm = 64.8668, GNorm = 0.8373, lr_0 = 3.7659e-04
Loss = 7.4423e-02, PNorm = 64.8781, GNorm = 0.6523, lr_0 = 3.7633e-04
Loss = 6.8484e-02, PNorm = 64.8874, GNorm = 0.5409, lr_0 = 3.7607e-04
Loss = 6.8492e-02, PNorm = 64.8929, GNorm = 0.5603, lr_0 = 3.7581e-04
Loss = 6.9310e-02, PNorm = 64.8953, GNorm = 0.5434, lr_0 = 3.7555e-04
Loss = 9.2497e-02, PNorm = 64.8986, GNorm = 1.1688, lr_0 = 3.7530e-04
Loss = 8.3957e-02, PNorm = 64.9036, GNorm = 0.6399, lr_0 = 3.7504e-04
Loss = 8.7971e-02, PNorm = 64.9101, GNorm = 0.8002, lr_0 = 3.7478e-04
Loss = 8.4593e-02, PNorm = 64.9190, GNorm = 0.8682, lr_0 = 3.7453e-04
Loss = 8.0515e-02, PNorm = 64.9292, GNorm = 0.5848, lr_0 = 3.7427e-04
Loss = 7.1248e-02, PNorm = 64.9367, GNorm = 0.7518, lr_0 = 3.7401e-04
Loss = 7.8077e-02, PNorm = 64.9441, GNorm = 0.5858, lr_0 = 3.7376e-04
Loss = 7.7900e-02, PNorm = 64.9522, GNorm = 0.6956, lr_0 = 3.7350e-04
Loss = 7.6650e-02, PNorm = 64.9599, GNorm = 0.6323, lr_0 = 3.7325e-04
Loss = 9.2352e-02, PNorm = 64.9678, GNorm = 0.7627, lr_0 = 3.7299e-04
Loss = 8.2105e-02, PNorm = 64.9745, GNorm = 0.7047, lr_0 = 3.7273e-04
Validation mae = 0.393054
Epoch 14
Loss = 6.2189e-02, PNorm = 64.9816, GNorm = 0.5548, lr_0 = 3.7248e-04
Loss = 6.5932e-02, PNorm = 64.9899, GNorm = 0.4486, lr_0 = 3.7222e-04
Loss = 7.1703e-02, PNorm = 65.0033, GNorm = 0.5344, lr_0 = 3.7197e-04
Loss = 6.0784e-02, PNorm = 65.0108, GNorm = 0.5069, lr_0 = 3.7171e-04
Loss = 6.7657e-02, PNorm = 65.0147, GNorm = 0.9641, lr_0 = 3.7146e-04
Loss = 6.7321e-02, PNorm = 65.0212, GNorm = 0.8500, lr_0 = 3.7120e-04
Loss = 7.9806e-02, PNorm = 65.0303, GNorm = 0.9380, lr_0 = 3.7095e-04
Loss = 6.7840e-02, PNorm = 65.0393, GNorm = 1.1637, lr_0 = 3.7070e-04
Loss = 6.7099e-02, PNorm = 65.0489, GNorm = 0.5583, lr_0 = 3.7044e-04
Loss = 7.3908e-02, PNorm = 65.0560, GNorm = 0.6018, lr_0 = 3.7019e-04
Loss = 6.2174e-02, PNorm = 65.0612, GNorm = 0.4622, lr_0 = 3.6993e-04
Loss = 6.6508e-02, PNorm = 65.0679, GNorm = 0.5401, lr_0 = 3.6968e-04
Loss = 7.0374e-02, PNorm = 65.0753, GNorm = 0.6704, lr_0 = 3.6943e-04
Loss = 7.6170e-02, PNorm = 65.0854, GNorm = 0.6334, lr_0 = 3.6917e-04
Loss = 7.2756e-02, PNorm = 65.0944, GNorm = 0.8386, lr_0 = 3.6892e-04
Loss = 6.0083e-02, PNorm = 65.1033, GNorm = 0.5455, lr_0 = 3.6867e-04
Loss = 6.6554e-02, PNorm = 65.1109, GNorm = 0.4082, lr_0 = 3.6842e-04
Loss = 7.3394e-02, PNorm = 65.1180, GNorm = 0.7333, lr_0 = 3.6816e-04
Loss = 6.5347e-02, PNorm = 65.1269, GNorm = 0.7184, lr_0 = 3.6791e-04
Loss = 7.0258e-02, PNorm = 65.1361, GNorm = 0.8714, lr_0 = 3.6766e-04
Loss = 5.4871e-02, PNorm = 65.1419, GNorm = 0.6573, lr_0 = 3.6741e-04
Loss = 6.5485e-02, PNorm = 65.1489, GNorm = 0.5236, lr_0 = 3.6716e-04
Loss = 6.6688e-02, PNorm = 65.1586, GNorm = 0.4956, lr_0 = 3.6690e-04
Loss = 6.6973e-02, PNorm = 65.1682, GNorm = 0.4613, lr_0 = 3.6665e-04
Loss = 6.5837e-02, PNorm = 65.1761, GNorm = 0.6199, lr_0 = 3.6640e-04
Loss = 6.5707e-02, PNorm = 65.1828, GNorm = 0.4900, lr_0 = 3.6615e-04
Loss = 7.6833e-02, PNorm = 65.1937, GNorm = 0.8994, lr_0 = 3.6590e-04
Loss = 7.0549e-02, PNorm = 65.2006, GNorm = 0.5518, lr_0 = 3.6565e-04
Loss = 8.1527e-02, PNorm = 65.2096, GNorm = 1.2035, lr_0 = 3.6540e-04
Loss = 7.1536e-02, PNorm = 65.2187, GNorm = 0.7384, lr_0 = 3.6515e-04
Loss = 8.4355e-02, PNorm = 65.2304, GNorm = 0.9205, lr_0 = 3.6490e-04
Loss = 7.4445e-02, PNorm = 65.2397, GNorm = 0.5237, lr_0 = 3.6465e-04
Loss = 6.4570e-02, PNorm = 65.2480, GNorm = 0.5274, lr_0 = 3.6440e-04
Loss = 6.4488e-02, PNorm = 65.2560, GNorm = 0.7298, lr_0 = 3.6415e-04
Loss = 6.4495e-02, PNorm = 65.2640, GNorm = 0.6789, lr_0 = 3.6390e-04
Loss = 7.0748e-02, PNorm = 65.2714, GNorm = 0.5295, lr_0 = 3.6365e-04
Loss = 5.9819e-02, PNorm = 65.2771, GNorm = 0.5642, lr_0 = 3.6340e-04
Loss = 7.5942e-02, PNorm = 65.2884, GNorm = 0.8859, lr_0 = 3.6315e-04
Loss = 7.5423e-02, PNorm = 65.3001, GNorm = 0.6708, lr_0 = 3.6290e-04
Loss = 7.7822e-02, PNorm = 65.3091, GNorm = 1.2329, lr_0 = 3.6266e-04
Loss = 6.1868e-02, PNorm = 65.3154, GNorm = 0.7351, lr_0 = 3.6241e-04
Loss = 6.3976e-02, PNorm = 65.3222, GNorm = 0.9981, lr_0 = 3.6216e-04
Loss = 6.3960e-02, PNorm = 65.3285, GNorm = 0.5863, lr_0 = 3.6191e-04
Loss = 6.4536e-02, PNorm = 65.3352, GNorm = 0.5757, lr_0 = 3.6166e-04
Loss = 6.0137e-02, PNorm = 65.3418, GNorm = 0.6923, lr_0 = 3.6141e-04
Loss = 7.2723e-02, PNorm = 65.3465, GNorm = 0.6792, lr_0 = 3.6117e-04
Loss = 6.7115e-02, PNorm = 65.3551, GNorm = 0.5150, lr_0 = 3.6092e-04
Loss = 7.0615e-02, PNorm = 65.3626, GNorm = 0.6660, lr_0 = 3.6067e-04
Loss = 6.2971e-02, PNorm = 65.3702, GNorm = 0.5415, lr_0 = 3.6043e-04
Loss = 6.5497e-02, PNorm = 65.3796, GNorm = 0.6521, lr_0 = 3.6018e-04
Loss = 6.7746e-02, PNorm = 65.3887, GNorm = 0.6335, lr_0 = 3.5993e-04
Loss = 7.5998e-02, PNorm = 65.3929, GNorm = 0.4690, lr_0 = 3.5969e-04
Loss = 6.9706e-02, PNorm = 65.3992, GNorm = 0.6786, lr_0 = 3.5944e-04
Loss = 7.4524e-02, PNorm = 65.4069, GNorm = 0.8073, lr_0 = 3.5919e-04
Loss = 6.0503e-02, PNorm = 65.4120, GNorm = 0.4757, lr_0 = 3.5895e-04
Loss = 6.3614e-02, PNorm = 65.4162, GNorm = 0.5241, lr_0 = 3.5870e-04
Loss = 6.5087e-02, PNorm = 65.4203, GNorm = 0.6255, lr_0 = 3.5845e-04
Loss = 6.5750e-02, PNorm = 65.4273, GNorm = 0.6104, lr_0 = 3.5821e-04
Loss = 6.2805e-02, PNorm = 65.4318, GNorm = 0.8693, lr_0 = 3.5796e-04
Loss = 7.3425e-02, PNorm = 65.4381, GNorm = 0.7017, lr_0 = 3.5772e-04
Loss = 7.9466e-02, PNorm = 65.4431, GNorm = 0.5031, lr_0 = 3.5747e-04
Loss = 6.8562e-02, PNorm = 65.4522, GNorm = 0.6062, lr_0 = 3.5723e-04
Loss = 6.7598e-02, PNorm = 65.4608, GNorm = 0.6011, lr_0 = 3.5698e-04
Loss = 7.2747e-02, PNorm = 65.4683, GNorm = 0.6736, lr_0 = 3.5674e-04
Loss = 6.6792e-02, PNorm = 65.4768, GNorm = 0.7149, lr_0 = 3.5650e-04
Loss = 7.1896e-02, PNorm = 65.4859, GNorm = 0.5075, lr_0 = 3.5625e-04
Loss = 7.1412e-02, PNorm = 65.4949, GNorm = 0.7133, lr_0 = 3.5601e-04
Loss = 6.9131e-02, PNorm = 65.5057, GNorm = 0.4981, lr_0 = 3.5576e-04
Loss = 6.9786e-02, PNorm = 65.5108, GNorm = 0.4736, lr_0 = 3.5552e-04
Loss = 6.4889e-02, PNorm = 65.5132, GNorm = 0.8733, lr_0 = 3.5528e-04
Loss = 8.1809e-02, PNorm = 65.5188, GNorm = 0.5043, lr_0 = 3.5503e-04
Loss = 8.4523e-02, PNorm = 65.5272, GNorm = 0.8371, lr_0 = 3.5479e-04
Loss = 7.0173e-02, PNorm = 65.5354, GNorm = 0.6934, lr_0 = 3.5455e-04
Loss = 7.3162e-02, PNorm = 65.5436, GNorm = 0.5460, lr_0 = 3.5430e-04
Loss = 7.3396e-02, PNorm = 65.5524, GNorm = 0.5258, lr_0 = 3.5406e-04
Loss = 8.4210e-02, PNorm = 65.5589, GNorm = 0.6170, lr_0 = 3.5382e-04
Loss = 7.8047e-02, PNorm = 65.5653, GNorm = 0.7797, lr_0 = 3.5358e-04
Loss = 7.5886e-02, PNorm = 65.5717, GNorm = 0.4529, lr_0 = 3.5333e-04
Loss = 6.6463e-02, PNorm = 65.5770, GNorm = 0.5526, lr_0 = 3.5309e-04
Loss = 7.7272e-02, PNorm = 65.5840, GNorm = 0.8095, lr_0 = 3.5285e-04
Loss = 8.1061e-02, PNorm = 65.5924, GNorm = 0.9387, lr_0 = 3.5261e-04
Loss = 6.5639e-02, PNorm = 65.5993, GNorm = 0.9233, lr_0 = 3.5237e-04
Loss = 7.4764e-02, PNorm = 65.6094, GNorm = 0.6849, lr_0 = 3.5212e-04
Loss = 6.6892e-02, PNorm = 65.6146, GNorm = 0.4805, lr_0 = 3.5188e-04
Loss = 7.1658e-02, PNorm = 65.6200, GNorm = 0.7382, lr_0 = 3.5164e-04
Loss = 7.8702e-02, PNorm = 65.6291, GNorm = 0.7011, lr_0 = 3.5140e-04
Loss = 7.1027e-02, PNorm = 65.6393, GNorm = 0.6972, lr_0 = 3.5116e-04
Loss = 6.7144e-02, PNorm = 65.6465, GNorm = 0.6200, lr_0 = 3.5092e-04
Loss = 6.7874e-02, PNorm = 65.6556, GNorm = 0.7095, lr_0 = 3.5068e-04
Loss = 7.4909e-02, PNorm = 65.6646, GNorm = 0.6666, lr_0 = 3.5044e-04
Loss = 7.2526e-02, PNorm = 65.6701, GNorm = 0.5421, lr_0 = 3.5020e-04
Loss = 7.8869e-02, PNorm = 65.6755, GNorm = 0.4248, lr_0 = 3.4996e-04
Loss = 7.4454e-02, PNorm = 65.6850, GNorm = 0.8965, lr_0 = 3.4972e-04
Loss = 6.3858e-02, PNorm = 65.6971, GNorm = 0.5117, lr_0 = 3.4948e-04
Loss = 7.2799e-02, PNorm = 65.7064, GNorm = 0.9268, lr_0 = 3.4924e-04
Loss = 7.7405e-02, PNorm = 65.7163, GNorm = 0.5188, lr_0 = 3.4900e-04
Loss = 7.0826e-02, PNorm = 65.7253, GNorm = 0.7980, lr_0 = 3.4876e-04
Loss = 7.5151e-02, PNorm = 65.7323, GNorm = 0.6716, lr_0 = 3.4852e-04
Loss = 7.4623e-02, PNorm = 65.7406, GNorm = 0.5974, lr_0 = 3.4828e-04
Loss = 6.4274e-02, PNorm = 65.7479, GNorm = 0.8017, lr_0 = 3.4805e-04
Loss = 7.7744e-02, PNorm = 65.7532, GNorm = 0.9662, lr_0 = 3.4781e-04
Loss = 6.5556e-02, PNorm = 65.7593, GNorm = 0.4556, lr_0 = 3.4757e-04
Loss = 6.2856e-02, PNorm = 65.7644, GNorm = 0.6221, lr_0 = 3.4733e-04
Loss = 7.6741e-02, PNorm = 65.7706, GNorm = 0.5639, lr_0 = 3.4709e-04
Loss = 7.8940e-02, PNorm = 65.7772, GNorm = 0.9674, lr_0 = 3.4686e-04
Loss = 7.2065e-02, PNorm = 65.7852, GNorm = 0.4709, lr_0 = 3.4662e-04
Loss = 6.2927e-02, PNorm = 65.7937, GNorm = 0.6723, lr_0 = 3.4638e-04
Loss = 6.8522e-02, PNorm = 65.7987, GNorm = 1.0439, lr_0 = 3.4614e-04
Loss = 6.9472e-02, PNorm = 65.8031, GNorm = 0.6249, lr_0 = 3.4591e-04
Loss = 7.0730e-02, PNorm = 65.8066, GNorm = 0.5532, lr_0 = 3.4567e-04
Loss = 6.5970e-02, PNorm = 65.8129, GNorm = 0.5393, lr_0 = 3.4543e-04
Loss = 7.1479e-02, PNorm = 65.8193, GNorm = 0.5149, lr_0 = 3.4520e-04
Loss = 7.3911e-02, PNorm = 65.8289, GNorm = 0.8373, lr_0 = 3.4496e-04
Loss = 7.0700e-02, PNorm = 65.8365, GNorm = 1.2946, lr_0 = 3.4472e-04
Loss = 6.5726e-02, PNorm = 65.8417, GNorm = 0.6136, lr_0 = 3.4449e-04
Loss = 7.1025e-02, PNorm = 65.8470, GNorm = 0.6264, lr_0 = 3.4425e-04
Loss = 9.1405e-02, PNorm = 65.8518, GNorm = 0.6028, lr_0 = 3.4402e-04
Loss = 6.8549e-02, PNorm = 65.8577, GNorm = 0.6017, lr_0 = 3.4378e-04
Loss = 7.3755e-02, PNorm = 65.8656, GNorm = 0.5815, lr_0 = 3.4354e-04
Loss = 7.1084e-02, PNorm = 65.8737, GNorm = 0.6336, lr_0 = 3.4331e-04
Validation mae = 0.394217
Epoch 15
Loss = 5.6619e-02, PNorm = 65.8810, GNorm = 0.5535, lr_0 = 3.4307e-04
Loss = 6.2025e-02, PNorm = 65.8868, GNorm = 0.7247, lr_0 = 3.4284e-04
Loss = 6.8949e-02, PNorm = 65.8959, GNorm = 0.6553, lr_0 = 3.4260e-04
Loss = 5.8457e-02, PNorm = 65.9052, GNorm = 0.5106, lr_0 = 3.4237e-04
Loss = 5.6421e-02, PNorm = 65.9105, GNorm = 0.5499, lr_0 = 3.4213e-04
Loss = 6.2926e-02, PNorm = 65.9157, GNorm = 0.6839, lr_0 = 3.4190e-04
Loss = 7.1807e-02, PNorm = 65.9224, GNorm = 0.6674, lr_0 = 3.4167e-04
Loss = 6.7714e-02, PNorm = 65.9330, GNorm = 0.4731, lr_0 = 3.4143e-04
Loss = 6.7036e-02, PNorm = 65.9414, GNorm = 0.5602, lr_0 = 3.4120e-04
Loss = 6.2474e-02, PNorm = 65.9483, GNorm = 0.4967, lr_0 = 3.4096e-04
Loss = 6.7515e-02, PNorm = 65.9544, GNorm = 0.5522, lr_0 = 3.4073e-04
Loss = 5.6971e-02, PNorm = 65.9602, GNorm = 0.5096, lr_0 = 3.4050e-04
Loss = 5.9192e-02, PNorm = 65.9688, GNorm = 0.5980, lr_0 = 3.4026e-04
Loss = 6.4286e-02, PNorm = 65.9760, GNorm = 0.7124, lr_0 = 3.4003e-04
Loss = 6.1829e-02, PNorm = 65.9797, GNorm = 0.6778, lr_0 = 3.3980e-04
Loss = 5.4126e-02, PNorm = 65.9844, GNorm = 0.6680, lr_0 = 3.3956e-04
Loss = 5.5153e-02, PNorm = 65.9895, GNorm = 0.4936, lr_0 = 3.3933e-04
Loss = 6.5630e-02, PNorm = 65.9961, GNorm = 1.0464, lr_0 = 3.3910e-04
Loss = 6.4230e-02, PNorm = 66.0034, GNorm = 0.8384, lr_0 = 3.3887e-04
Loss = 6.1241e-02, PNorm = 66.0121, GNorm = 0.5754, lr_0 = 3.3864e-04
Loss = 6.6657e-02, PNorm = 66.0232, GNorm = 0.8260, lr_0 = 3.3840e-04
Loss = 6.6066e-02, PNorm = 66.0305, GNorm = 0.6498, lr_0 = 3.3817e-04
Loss = 6.4603e-02, PNorm = 66.0410, GNorm = 0.5525, lr_0 = 3.3794e-04
Loss = 7.0213e-02, PNorm = 66.0492, GNorm = 0.5219, lr_0 = 3.3771e-04
Loss = 6.5114e-02, PNorm = 66.0544, GNorm = 0.9760, lr_0 = 3.3748e-04
Loss = 6.5015e-02, PNorm = 66.0583, GNorm = 0.7463, lr_0 = 3.3725e-04
Loss = 6.1166e-02, PNorm = 66.0634, GNorm = 0.4342, lr_0 = 3.3701e-04
Loss = 7.3464e-02, PNorm = 66.0713, GNorm = 0.7960, lr_0 = 3.3678e-04
Loss = 6.6785e-02, PNorm = 66.0793, GNorm = 0.5399, lr_0 = 3.3655e-04
Loss = 6.9655e-02, PNorm = 66.0827, GNorm = 0.6918, lr_0 = 3.3632e-04
Loss = 6.7546e-02, PNorm = 66.0871, GNorm = 0.8038, lr_0 = 3.3609e-04
Loss = 7.7138e-02, PNorm = 66.0910, GNorm = 0.9985, lr_0 = 3.3586e-04
Loss = 6.1766e-02, PNorm = 66.0981, GNorm = 0.5069, lr_0 = 3.3563e-04
Loss = 7.2186e-02, PNorm = 66.1092, GNorm = 0.5852, lr_0 = 3.3540e-04
Loss = 8.0624e-02, PNorm = 66.1168, GNorm = 1.0405, lr_0 = 3.3517e-04
Loss = 5.6775e-02, PNorm = 66.1212, GNorm = 0.5476, lr_0 = 3.3494e-04
Loss = 7.2294e-02, PNorm = 66.1291, GNorm = 0.6090, lr_0 = 3.3471e-04
Loss = 5.6825e-02, PNorm = 66.1352, GNorm = 0.4792, lr_0 = 3.3448e-04
Loss = 6.2168e-02, PNorm = 66.1419, GNorm = 1.0007, lr_0 = 3.3425e-04
Loss = 6.4032e-02, PNorm = 66.1497, GNorm = 0.5360, lr_0 = 3.3403e-04
Loss = 7.4223e-02, PNorm = 66.1588, GNorm = 0.6750, lr_0 = 3.3380e-04
Loss = 5.8557e-02, PNorm = 66.1671, GNorm = 0.5643, lr_0 = 3.3357e-04
Loss = 7.8233e-02, PNorm = 66.1743, GNorm = 0.7153, lr_0 = 3.3334e-04
Loss = 6.4228e-02, PNorm = 66.1818, GNorm = 0.6735, lr_0 = 3.3311e-04
Loss = 6.9456e-02, PNorm = 66.1906, GNorm = 0.8899, lr_0 = 3.3288e-04
Loss = 6.9576e-02, PNorm = 66.1961, GNorm = 0.4628, lr_0 = 3.3265e-04
Loss = 8.2039e-02, PNorm = 66.2028, GNorm = 0.9246, lr_0 = 3.3243e-04
Loss = 6.5048e-02, PNorm = 66.2110, GNorm = 0.7623, lr_0 = 3.3220e-04
Loss = 7.1732e-02, PNorm = 66.2211, GNorm = 0.5436, lr_0 = 3.3197e-04
Loss = 5.9807e-02, PNorm = 66.2283, GNorm = 0.5099, lr_0 = 3.3174e-04
Loss = 6.2921e-02, PNorm = 66.2340, GNorm = 1.0264, lr_0 = 3.3152e-04
Loss = 6.3530e-02, PNorm = 66.2394, GNorm = 0.5433, lr_0 = 3.3129e-04
Loss = 6.2874e-02, PNorm = 66.2452, GNorm = 0.8094, lr_0 = 3.3106e-04
Loss = 6.2592e-02, PNorm = 66.2526, GNorm = 0.4726, lr_0 = 3.3084e-04
Loss = 5.5631e-02, PNorm = 66.2608, GNorm = 0.5070, lr_0 = 3.3061e-04
Loss = 6.6814e-02, PNorm = 66.2689, GNorm = 0.7432, lr_0 = 3.3038e-04
Loss = 7.3266e-02, PNorm = 66.2754, GNorm = 0.6005, lr_0 = 3.3016e-04
Loss = 6.7735e-02, PNorm = 66.2818, GNorm = 0.5255, lr_0 = 3.2993e-04
Loss = 6.2478e-02, PNorm = 66.2896, GNorm = 0.4261, lr_0 = 3.2970e-04
Loss = 6.6198e-02, PNorm = 66.2979, GNorm = 0.5788, lr_0 = 3.2948e-04
Loss = 7.0618e-02, PNorm = 66.3063, GNorm = 0.4148, lr_0 = 3.2925e-04
Loss = 6.9870e-02, PNorm = 66.3148, GNorm = 0.6108, lr_0 = 3.2903e-04
Loss = 5.9527e-02, PNorm = 66.3214, GNorm = 0.8285, lr_0 = 3.2880e-04
Loss = 6.8522e-02, PNorm = 66.3275, GNorm = 0.7758, lr_0 = 3.2858e-04
Loss = 7.3936e-02, PNorm = 66.3360, GNorm = 0.7654, lr_0 = 3.2835e-04
Loss = 8.3688e-02, PNorm = 66.3435, GNorm = 0.6676, lr_0 = 3.2813e-04
Loss = 6.6832e-02, PNorm = 66.3530, GNorm = 0.5282, lr_0 = 3.2790e-04
Loss = 6.8317e-02, PNorm = 66.3620, GNorm = 0.6925, lr_0 = 3.2768e-04
Loss = 5.7205e-02, PNorm = 66.3681, GNorm = 0.4678, lr_0 = 3.2745e-04
Loss = 7.3350e-02, PNorm = 66.3767, GNorm = 0.6865, lr_0 = 3.2723e-04
Loss = 6.4615e-02, PNorm = 66.3848, GNorm = 0.5125, lr_0 = 3.2700e-04
Loss = 6.6887e-02, PNorm = 66.3917, GNorm = 0.5035, lr_0 = 3.2678e-04
Loss = 5.8551e-02, PNorm = 66.3979, GNorm = 0.4102, lr_0 = 3.2656e-04
Loss = 6.4754e-02, PNorm = 66.4056, GNorm = 0.7303, lr_0 = 3.2633e-04
Loss = 6.8735e-02, PNorm = 66.4145, GNorm = 0.4733, lr_0 = 3.2611e-04
Loss = 5.8830e-02, PNorm = 66.4228, GNorm = 0.5339, lr_0 = 3.2589e-04
Loss = 6.6267e-02, PNorm = 66.4294, GNorm = 0.4472, lr_0 = 3.2566e-04
Loss = 6.8500e-02, PNorm = 66.4385, GNorm = 0.7259, lr_0 = 3.2544e-04
Loss = 7.1087e-02, PNorm = 66.4481, GNorm = 0.6995, lr_0 = 3.2522e-04
Loss = 6.5883e-02, PNorm = 66.4585, GNorm = 0.6506, lr_0 = 3.2499e-04
Loss = 8.4098e-02, PNorm = 66.4693, GNorm = 1.1741, lr_0 = 3.2477e-04
Loss = 6.7080e-02, PNorm = 66.4782, GNorm = 0.8106, lr_0 = 3.2455e-04
Loss = 6.5117e-02, PNorm = 66.4853, GNorm = 0.5169, lr_0 = 3.2433e-04
Loss = 7.4187e-02, PNorm = 66.4922, GNorm = 0.4766, lr_0 = 3.2410e-04
Loss = 6.6408e-02, PNorm = 66.4988, GNorm = 0.6961, lr_0 = 3.2388e-04
Loss = 6.2880e-02, PNorm = 66.5052, GNorm = 0.6309, lr_0 = 3.2366e-04
Loss = 7.4012e-02, PNorm = 66.5110, GNorm = 0.7318, lr_0 = 3.2344e-04
Loss = 6.5810e-02, PNorm = 66.5168, GNorm = 0.6404, lr_0 = 3.2322e-04
Loss = 5.8170e-02, PNorm = 66.5207, GNorm = 0.4881, lr_0 = 3.2300e-04
Loss = 6.5036e-02, PNorm = 66.5260, GNorm = 0.4622, lr_0 = 3.2277e-04
Loss = 7.3016e-02, PNorm = 66.5325, GNorm = 0.8584, lr_0 = 3.2255e-04
Loss = 7.1255e-02, PNorm = 66.5405, GNorm = 0.3876, lr_0 = 3.2233e-04
Loss = 7.0714e-02, PNorm = 66.5464, GNorm = 0.6870, lr_0 = 3.2211e-04
Loss = 6.6903e-02, PNorm = 66.5504, GNorm = 0.6289, lr_0 = 3.2189e-04
Loss = 6.8933e-02, PNorm = 66.5559, GNorm = 0.5208, lr_0 = 3.2167e-04
Loss = 7.6865e-02, PNorm = 66.5605, GNorm = 0.7568, lr_0 = 3.2145e-04
Loss = 6.7416e-02, PNorm = 66.5660, GNorm = 0.6620, lr_0 = 3.2123e-04
Loss = 6.2413e-02, PNorm = 66.5677, GNorm = 0.5472, lr_0 = 3.2101e-04
Loss = 6.2579e-02, PNorm = 66.5699, GNorm = 0.5761, lr_0 = 3.2079e-04
Loss = 8.9541e-02, PNorm = 66.5766, GNorm = 0.5672, lr_0 = 3.2057e-04
Loss = 7.4132e-02, PNorm = 66.5812, GNorm = 0.5609, lr_0 = 3.2035e-04
Loss = 6.4335e-02, PNorm = 66.5878, GNorm = 0.6159, lr_0 = 3.2013e-04
Loss = 6.9853e-02, PNorm = 66.5952, GNorm = 0.6587, lr_0 = 3.1991e-04
Loss = 6.7772e-02, PNorm = 66.6022, GNorm = 0.7641, lr_0 = 3.1969e-04
Loss = 6.1932e-02, PNorm = 66.6091, GNorm = 0.5660, lr_0 = 3.1947e-04
Loss = 6.2129e-02, PNorm = 66.6138, GNorm = 0.6319, lr_0 = 3.1925e-04
Loss = 6.5133e-02, PNorm = 66.6193, GNorm = 0.6284, lr_0 = 3.1904e-04
Loss = 5.9210e-02, PNorm = 66.6270, GNorm = 0.6560, lr_0 = 3.1882e-04
Loss = 6.7871e-02, PNorm = 66.6297, GNorm = 0.7361, lr_0 = 3.1860e-04
Loss = 6.1371e-02, PNorm = 66.6340, GNorm = 0.5570, lr_0 = 3.1838e-04
Loss = 6.2401e-02, PNorm = 66.6419, GNorm = 0.5980, lr_0 = 3.1816e-04
Loss = 6.6668e-02, PNorm = 66.6526, GNorm = 0.8770, lr_0 = 3.1794e-04
Loss = 6.3536e-02, PNorm = 66.6604, GNorm = 0.3692, lr_0 = 3.1773e-04
Loss = 6.9091e-02, PNorm = 66.6671, GNorm = 0.6814, lr_0 = 3.1751e-04
Loss = 6.6613e-02, PNorm = 66.6710, GNorm = 0.6300, lr_0 = 3.1729e-04
Loss = 6.4526e-02, PNorm = 66.6746, GNorm = 0.6085, lr_0 = 3.1707e-04
Loss = 6.1308e-02, PNorm = 66.6788, GNorm = 0.4779, lr_0 = 3.1686e-04
Loss = 6.4411e-02, PNorm = 66.6821, GNorm = 0.7617, lr_0 = 3.1664e-04
Loss = 6.5500e-02, PNorm = 66.6876, GNorm = 1.5186, lr_0 = 3.1642e-04
Loss = 5.8781e-02, PNorm = 66.6920, GNorm = 0.4639, lr_0 = 3.1621e-04
Validation mae = 0.387554
Epoch 16
Loss = 4.9250e-02, PNorm = 66.6946, GNorm = 0.5758, lr_0 = 3.1599e-04
Loss = 5.8533e-02, PNorm = 66.6993, GNorm = 0.5749, lr_0 = 3.1577e-04
Loss = 6.3346e-02, PNorm = 66.7088, GNorm = 0.4840, lr_0 = 3.1556e-04
Loss = 5.4872e-02, PNorm = 66.7160, GNorm = 0.6281, lr_0 = 3.1534e-04
Loss = 6.1381e-02, PNorm = 66.7216, GNorm = 0.5630, lr_0 = 3.1512e-04
Loss = 5.7967e-02, PNorm = 66.7300, GNorm = 0.4919, lr_0 = 3.1491e-04
Loss = 5.4875e-02, PNorm = 66.7397, GNorm = 0.5022, lr_0 = 3.1469e-04
Loss = 6.5405e-02, PNorm = 66.7458, GNorm = 0.4800, lr_0 = 3.1448e-04
Loss = 6.8562e-02, PNorm = 66.7509, GNorm = 0.4644, lr_0 = 3.1426e-04
Loss = 6.3580e-02, PNorm = 66.7590, GNorm = 0.5365, lr_0 = 3.1405e-04
Loss = 5.4340e-02, PNorm = 66.7666, GNorm = 0.4262, lr_0 = 3.1383e-04
Loss = 5.6262e-02, PNorm = 66.7731, GNorm = 0.9504, lr_0 = 3.1362e-04
Loss = 5.6765e-02, PNorm = 66.7806, GNorm = 0.5357, lr_0 = 3.1340e-04
Loss = 6.7891e-02, PNorm = 66.7858, GNorm = 0.8090, lr_0 = 3.1319e-04
Loss = 5.2903e-02, PNorm = 66.7929, GNorm = 0.6187, lr_0 = 3.1297e-04
Loss = 5.5368e-02, PNorm = 66.8008, GNorm = 0.6781, lr_0 = 3.1276e-04
Loss = 5.7725e-02, PNorm = 66.8113, GNorm = 0.5009, lr_0 = 3.1254e-04
Loss = 7.1758e-02, PNorm = 66.8198, GNorm = 0.8900, lr_0 = 3.1233e-04
Loss = 6.1436e-02, PNorm = 66.8290, GNorm = 0.5136, lr_0 = 3.1212e-04
Loss = 5.6718e-02, PNorm = 66.8377, GNorm = 0.5863, lr_0 = 3.1190e-04
Loss = 5.7245e-02, PNorm = 66.8455, GNorm = 0.4842, lr_0 = 3.1169e-04
Loss = 6.3850e-02, PNorm = 66.8501, GNorm = 0.5314, lr_0 = 3.1147e-04
Loss = 5.5150e-02, PNorm = 66.8534, GNorm = 0.5253, lr_0 = 3.1126e-04
Loss = 6.1773e-02, PNorm = 66.8574, GNorm = 0.6246, lr_0 = 3.1105e-04
Loss = 5.7758e-02, PNorm = 66.8664, GNorm = 0.6120, lr_0 = 3.1083e-04
Loss = 5.5243e-02, PNorm = 66.8728, GNorm = 0.4773, lr_0 = 3.1062e-04
Loss = 5.5141e-02, PNorm = 66.8764, GNorm = 0.3825, lr_0 = 3.1041e-04
Loss = 6.0508e-02, PNorm = 66.8808, GNorm = 0.3945, lr_0 = 3.1020e-04
Loss = 6.4776e-02, PNorm = 66.8874, GNorm = 0.8365, lr_0 = 3.0998e-04
Loss = 5.8801e-02, PNorm = 66.8938, GNorm = 0.5583, lr_0 = 3.0977e-04
Loss = 5.3643e-02, PNorm = 66.9007, GNorm = 0.4687, lr_0 = 3.0956e-04
Loss = 5.6827e-02, PNorm = 66.9057, GNorm = 0.6614, lr_0 = 3.0935e-04
Loss = 6.5557e-02, PNorm = 66.9105, GNorm = 1.0180, lr_0 = 3.0914e-04
Loss = 7.0080e-02, PNorm = 66.9167, GNorm = 0.6775, lr_0 = 3.0892e-04
Loss = 5.5386e-02, PNorm = 66.9228, GNorm = 0.4940, lr_0 = 3.0871e-04
Loss = 5.8139e-02, PNorm = 66.9276, GNorm = 0.6031, lr_0 = 3.0850e-04
Loss = 5.8928e-02, PNorm = 66.9323, GNorm = 0.8614, lr_0 = 3.0829e-04
Loss = 6.7922e-02, PNorm = 66.9381, GNorm = 0.5663, lr_0 = 3.0808e-04
Loss = 7.5742e-02, PNorm = 66.9469, GNorm = 0.5041, lr_0 = 3.0787e-04
Loss = 5.2641e-02, PNorm = 66.9547, GNorm = 0.5574, lr_0 = 3.0766e-04
Loss = 6.1512e-02, PNorm = 66.9622, GNorm = 0.9772, lr_0 = 3.0745e-04
Loss = 5.7789e-02, PNorm = 66.9679, GNorm = 0.7340, lr_0 = 3.0723e-04
Loss = 6.0317e-02, PNorm = 66.9729, GNorm = 0.5364, lr_0 = 3.0702e-04
Loss = 5.8795e-02, PNorm = 66.9787, GNorm = 0.6945, lr_0 = 3.0681e-04
Loss = 6.1822e-02, PNorm = 66.9824, GNorm = 0.3852, lr_0 = 3.0660e-04
Loss = 5.8042e-02, PNorm = 66.9871, GNorm = 0.3327, lr_0 = 3.0639e-04
Loss = 6.7520e-02, PNorm = 66.9933, GNorm = 0.5502, lr_0 = 3.0618e-04
Loss = 6.4405e-02, PNorm = 66.9982, GNorm = 0.6976, lr_0 = 3.0597e-04
Loss = 6.2963e-02, PNorm = 67.0025, GNorm = 0.8626, lr_0 = 3.0576e-04
Loss = 6.0119e-02, PNorm = 67.0073, GNorm = 0.9025, lr_0 = 3.0555e-04
Loss = 5.8410e-02, PNorm = 67.0141, GNorm = 0.6527, lr_0 = 3.0535e-04
Loss = 6.2064e-02, PNorm = 67.0213, GNorm = 0.5579, lr_0 = 3.0514e-04
Loss = 6.4734e-02, PNorm = 67.0246, GNorm = 0.4523, lr_0 = 3.0493e-04
Loss = 5.1419e-02, PNorm = 67.0272, GNorm = 0.4798, lr_0 = 3.0472e-04
Loss = 5.5895e-02, PNorm = 67.0317, GNorm = 0.3907, lr_0 = 3.0451e-04
Loss = 5.4431e-02, PNorm = 67.0358, GNorm = 0.5213, lr_0 = 3.0430e-04
Loss = 5.9494e-02, PNorm = 67.0416, GNorm = 0.5310, lr_0 = 3.0409e-04
Loss = 5.9482e-02, PNorm = 67.0469, GNorm = 0.7424, lr_0 = 3.0388e-04
Loss = 6.4179e-02, PNorm = 67.0529, GNorm = 0.6227, lr_0 = 3.0368e-04
Loss = 5.7517e-02, PNorm = 67.0603, GNorm = 0.5258, lr_0 = 3.0347e-04
Loss = 6.1854e-02, PNorm = 67.0656, GNorm = 0.6061, lr_0 = 3.0326e-04
Loss = 6.4660e-02, PNorm = 67.0696, GNorm = 0.5195, lr_0 = 3.0305e-04
Loss = 5.5033e-02, PNorm = 67.0767, GNorm = 0.4513, lr_0 = 3.0284e-04
Loss = 6.2218e-02, PNorm = 67.0816, GNorm = 0.5282, lr_0 = 3.0264e-04
Loss = 5.5937e-02, PNorm = 67.0851, GNorm = 1.1555, lr_0 = 3.0243e-04
Loss = 6.1342e-02, PNorm = 67.0895, GNorm = 0.5826, lr_0 = 3.0222e-04
Loss = 6.1024e-02, PNorm = 67.0958, GNorm = 0.4734, lr_0 = 3.0202e-04
Loss = 5.9907e-02, PNorm = 67.0996, GNorm = 0.5820, lr_0 = 3.0181e-04
Loss = 5.7050e-02, PNorm = 67.1028, GNorm = 0.4724, lr_0 = 3.0160e-04
Loss = 5.9836e-02, PNorm = 67.1084, GNorm = 0.4786, lr_0 = 3.0140e-04
Loss = 5.7849e-02, PNorm = 67.1140, GNorm = 0.4740, lr_0 = 3.0119e-04
Loss = 5.5414e-02, PNorm = 67.1207, GNorm = 0.6658, lr_0 = 3.0098e-04
Loss = 5.8281e-02, PNorm = 67.1286, GNorm = 0.5271, lr_0 = 3.0078e-04
Loss = 5.1975e-02, PNorm = 67.1319, GNorm = 0.5772, lr_0 = 3.0057e-04
Loss = 5.8875e-02, PNorm = 67.1361, GNorm = 0.7788, lr_0 = 3.0036e-04
Loss = 5.5066e-02, PNorm = 67.1400, GNorm = 0.4790, lr_0 = 3.0016e-04
Loss = 6.0942e-02, PNorm = 67.1421, GNorm = 0.5262, lr_0 = 2.9995e-04
Loss = 6.2005e-02, PNorm = 67.1469, GNorm = 0.5474, lr_0 = 2.9975e-04
Loss = 6.8631e-02, PNorm = 67.1539, GNorm = 0.4669, lr_0 = 2.9954e-04
Loss = 6.2416e-02, PNorm = 67.1632, GNorm = 0.9812, lr_0 = 2.9934e-04
Loss = 6.4161e-02, PNorm = 67.1715, GNorm = 0.6818, lr_0 = 2.9913e-04
Loss = 6.5046e-02, PNorm = 67.1776, GNorm = 0.5134, lr_0 = 2.9893e-04
Loss = 5.6525e-02, PNorm = 67.1805, GNorm = 0.6297, lr_0 = 2.9872e-04
Loss = 6.2061e-02, PNorm = 67.1857, GNorm = 0.5559, lr_0 = 2.9852e-04
Loss = 5.1976e-02, PNorm = 67.1933, GNorm = 0.7494, lr_0 = 2.9831e-04
Loss = 6.7829e-02, PNorm = 67.1990, GNorm = 0.5168, lr_0 = 2.9811e-04
Loss = 6.7122e-02, PNorm = 67.2073, GNorm = 0.8393, lr_0 = 2.9790e-04
Loss = 6.7222e-02, PNorm = 67.2138, GNorm = 1.1478, lr_0 = 2.9770e-04
Loss = 6.1542e-02, PNorm = 67.2195, GNorm = 0.6752, lr_0 = 2.9750e-04
Loss = 5.8021e-02, PNorm = 67.2255, GNorm = 0.6457, lr_0 = 2.9729e-04
Loss = 6.6153e-02, PNorm = 67.2299, GNorm = 0.7409, lr_0 = 2.9709e-04
Loss = 6.0942e-02, PNorm = 67.2341, GNorm = 0.7670, lr_0 = 2.9689e-04
Loss = 6.1549e-02, PNorm = 67.2386, GNorm = 0.4553, lr_0 = 2.9668e-04
Loss = 5.7352e-02, PNorm = 67.2443, GNorm = 0.4498, lr_0 = 2.9648e-04
Loss = 6.0182e-02, PNorm = 67.2512, GNorm = 0.7075, lr_0 = 2.9628e-04
Loss = 5.6108e-02, PNorm = 67.2568, GNorm = 0.8616, lr_0 = 2.9607e-04
Loss = 5.8591e-02, PNorm = 67.2617, GNorm = 0.5128, lr_0 = 2.9587e-04
Loss = 5.5293e-02, PNorm = 67.2642, GNorm = 0.5146, lr_0 = 2.9567e-04
Loss = 6.5014e-02, PNorm = 67.2684, GNorm = 0.6551, lr_0 = 2.9546e-04
Loss = 6.5021e-02, PNorm = 67.2748, GNorm = 0.5387, lr_0 = 2.9526e-04
Loss = 6.6915e-02, PNorm = 67.2815, GNorm = 0.8885, lr_0 = 2.9506e-04
Loss = 6.7607e-02, PNorm = 67.2857, GNorm = 0.6927, lr_0 = 2.9486e-04
Loss = 6.9563e-02, PNorm = 67.2950, GNorm = 0.3739, lr_0 = 2.9466e-04
Loss = 5.9409e-02, PNorm = 67.3007, GNorm = 0.7510, lr_0 = 2.9445e-04
Loss = 6.9144e-02, PNorm = 67.3065, GNorm = 0.5594, lr_0 = 2.9425e-04
Loss = 6.1806e-02, PNorm = 67.3111, GNorm = 0.4327, lr_0 = 2.9405e-04
Loss = 7.2797e-02, PNorm = 67.3176, GNorm = 0.8933, lr_0 = 2.9385e-04
Loss = 8.1309e-02, PNorm = 67.3244, GNorm = 0.5844, lr_0 = 2.9365e-04
Loss = 6.8785e-02, PNorm = 67.3302, GNorm = 0.9217, lr_0 = 2.9345e-04
Loss = 6.7579e-02, PNorm = 67.3373, GNorm = 0.6990, lr_0 = 2.9325e-04
Loss = 6.5299e-02, PNorm = 67.3432, GNorm = 0.5507, lr_0 = 2.9305e-04
Loss = 6.5019e-02, PNorm = 67.3487, GNorm = 0.8601, lr_0 = 2.9284e-04
Loss = 6.3490e-02, PNorm = 67.3550, GNorm = 0.5207, lr_0 = 2.9264e-04
Loss = 7.4189e-02, PNorm = 67.3601, GNorm = 0.9668, lr_0 = 2.9244e-04
Loss = 6.7071e-02, PNorm = 67.3661, GNorm = 0.6708, lr_0 = 2.9224e-04
Loss = 6.0016e-02, PNorm = 67.3713, GNorm = 0.4762, lr_0 = 2.9204e-04
Loss = 7.1007e-02, PNorm = 67.3733, GNorm = 0.5301, lr_0 = 2.9184e-04
Loss = 5.8144e-02, PNorm = 67.3778, GNorm = 0.4739, lr_0 = 2.9164e-04
Loss = 5.5994e-02, PNorm = 67.3823, GNorm = 0.6787, lr_0 = 2.9144e-04
Loss = 5.7758e-02, PNorm = 67.3878, GNorm = 0.6832, lr_0 = 2.9124e-04
Validation mae = 0.385886
Epoch 17
Loss = 5.0520e-02, PNorm = 67.3939, GNorm = 0.4042, lr_0 = 2.9104e-04
Loss = 5.4579e-02, PNorm = 67.4013, GNorm = 0.7992, lr_0 = 2.9084e-04
Loss = 5.2666e-02, PNorm = 67.4094, GNorm = 0.4711, lr_0 = 2.9065e-04
Loss = 5.6513e-02, PNorm = 67.4146, GNorm = 0.6598, lr_0 = 2.9045e-04
Loss = 5.2939e-02, PNorm = 67.4218, GNorm = 0.6078, lr_0 = 2.9025e-04
Loss = 5.2400e-02, PNorm = 67.4289, GNorm = 0.8840, lr_0 = 2.9005e-04
Loss = 5.4736e-02, PNorm = 67.4347, GNorm = 0.9390, lr_0 = 2.8985e-04
Loss = 4.8706e-02, PNorm = 67.4376, GNorm = 0.5064, lr_0 = 2.8965e-04
Loss = 5.5532e-02, PNorm = 67.4402, GNorm = 0.4669, lr_0 = 2.8945e-04
Loss = 5.3101e-02, PNorm = 67.4455, GNorm = 0.4289, lr_0 = 2.8925e-04
Loss = 4.5796e-02, PNorm = 67.4515, GNorm = 0.3951, lr_0 = 2.8906e-04
Loss = 6.1131e-02, PNorm = 67.4563, GNorm = 0.6822, lr_0 = 2.8886e-04
Loss = 5.5728e-02, PNorm = 67.4626, GNorm = 0.4372, lr_0 = 2.8866e-04
Loss = 5.4961e-02, PNorm = 67.4691, GNorm = 0.9144, lr_0 = 2.8846e-04
Loss = 4.5305e-02, PNorm = 67.4738, GNorm = 0.5529, lr_0 = 2.8826e-04
Loss = 6.1995e-02, PNorm = 67.4803, GNorm = 0.6608, lr_0 = 2.8807e-04
Loss = 4.9327e-02, PNorm = 67.4853, GNorm = 0.5115, lr_0 = 2.8787e-04
Loss = 5.9188e-02, PNorm = 67.4909, GNorm = 0.3655, lr_0 = 2.8767e-04
Loss = 5.7635e-02, PNorm = 67.4960, GNorm = 0.5836, lr_0 = 2.8748e-04
Loss = 5.3417e-02, PNorm = 67.5044, GNorm = 0.5223, lr_0 = 2.8728e-04
Loss = 5.5640e-02, PNorm = 67.5085, GNorm = 0.4991, lr_0 = 2.8708e-04
Loss = 5.1912e-02, PNorm = 67.5125, GNorm = 0.5406, lr_0 = 2.8689e-04
Loss = 5.4415e-02, PNorm = 67.5204, GNorm = 0.4832, lr_0 = 2.8669e-04
Loss = 5.4694e-02, PNorm = 67.5254, GNorm = 0.4544, lr_0 = 2.8649e-04
Loss = 5.6636e-02, PNorm = 67.5301, GNorm = 0.6745, lr_0 = 2.8630e-04
Loss = 5.5316e-02, PNorm = 67.5338, GNorm = 1.4743, lr_0 = 2.8610e-04
Loss = 5.3003e-02, PNorm = 67.5389, GNorm = 0.8425, lr_0 = 2.8590e-04
Loss = 5.0783e-02, PNorm = 67.5435, GNorm = 0.5889, lr_0 = 2.8571e-04
Loss = 5.1250e-02, PNorm = 67.5468, GNorm = 0.3743, lr_0 = 2.8551e-04
Loss = 4.7522e-02, PNorm = 67.5513, GNorm = 0.4839, lr_0 = 2.8532e-04
Loss = 4.9859e-02, PNorm = 67.5538, GNorm = 0.5732, lr_0 = 2.8512e-04
Loss = 4.7556e-02, PNorm = 67.5607, GNorm = 0.5905, lr_0 = 2.8493e-04
Loss = 6.1083e-02, PNorm = 67.5668, GNorm = 0.5018, lr_0 = 2.8473e-04
Loss = 5.1282e-02, PNorm = 67.5706, GNorm = 0.4880, lr_0 = 2.8454e-04
Loss = 5.4886e-02, PNorm = 67.5722, GNorm = 0.4886, lr_0 = 2.8434e-04
Loss = 5.8588e-02, PNorm = 67.5780, GNorm = 0.5461, lr_0 = 2.8415e-04
Loss = 6.3049e-02, PNorm = 67.5834, GNorm = 0.5585, lr_0 = 2.8395e-04
Loss = 6.2702e-02, PNorm = 67.5866, GNorm = 1.0049, lr_0 = 2.8376e-04
Loss = 6.0294e-02, PNorm = 67.5919, GNorm = 0.6646, lr_0 = 2.8356e-04
Loss = 5.8149e-02, PNorm = 67.5982, GNorm = 0.4914, lr_0 = 2.8337e-04
Loss = 5.3221e-02, PNorm = 67.6086, GNorm = 0.4452, lr_0 = 2.8317e-04
Loss = 5.7815e-02, PNorm = 67.6177, GNorm = 0.5550, lr_0 = 2.8298e-04
Loss = 5.0782e-02, PNorm = 67.6259, GNorm = 0.5725, lr_0 = 2.8279e-04
Loss = 6.2164e-02, PNorm = 67.6317, GNorm = 0.6771, lr_0 = 2.8259e-04
Loss = 5.2982e-02, PNorm = 67.6354, GNorm = 0.5354, lr_0 = 2.8240e-04
Loss = 5.7624e-02, PNorm = 67.6403, GNorm = 0.4973, lr_0 = 2.8221e-04
Loss = 5.1700e-02, PNorm = 67.6460, GNorm = 0.3669, lr_0 = 2.8201e-04
Loss = 5.0671e-02, PNorm = 67.6517, GNorm = 0.4800, lr_0 = 2.8182e-04
Loss = 5.8637e-02, PNorm = 67.6583, GNorm = 0.6596, lr_0 = 2.8163e-04
Loss = 5.7349e-02, PNorm = 67.6642, GNorm = 0.4754, lr_0 = 2.8143e-04
Loss = 5.9960e-02, PNorm = 67.6721, GNorm = 0.4576, lr_0 = 2.8124e-04
Loss = 7.4865e-02, PNorm = 67.6779, GNorm = 0.6065, lr_0 = 2.8105e-04
Loss = 5.7982e-02, PNorm = 67.6827, GNorm = 0.4932, lr_0 = 2.8085e-04
Loss = 5.6564e-02, PNorm = 67.6870, GNorm = 0.4253, lr_0 = 2.8066e-04
Loss = 6.5264e-02, PNorm = 67.6903, GNorm = 0.5507, lr_0 = 2.8047e-04
Loss = 5.9031e-02, PNorm = 67.6947, GNorm = 0.5409, lr_0 = 2.8028e-04
Loss = 6.6172e-02, PNorm = 67.7024, GNorm = 0.7178, lr_0 = 2.8009e-04
Loss = 5.9852e-02, PNorm = 67.7085, GNorm = 0.6673, lr_0 = 2.7989e-04
Loss = 5.5873e-02, PNorm = 67.7139, GNorm = 0.4598, lr_0 = 2.7970e-04
Loss = 5.5131e-02, PNorm = 67.7202, GNorm = 1.1166, lr_0 = 2.7951e-04
Loss = 7.1750e-02, PNorm = 67.7260, GNorm = 0.7993, lr_0 = 2.7932e-04
Loss = 7.1185e-02, PNorm = 67.7326, GNorm = 0.4529, lr_0 = 2.7913e-04
Loss = 5.7889e-02, PNorm = 67.7380, GNorm = 0.4322, lr_0 = 2.7894e-04
Loss = 5.7620e-02, PNorm = 67.7432, GNorm = 0.4245, lr_0 = 2.7875e-04
Loss = 6.4487e-02, PNorm = 67.7495, GNorm = 0.5775, lr_0 = 2.7855e-04
Loss = 5.5294e-02, PNorm = 67.7551, GNorm = 0.5723, lr_0 = 2.7836e-04
Loss = 5.7046e-02, PNorm = 67.7596, GNorm = 0.5344, lr_0 = 2.7817e-04
Loss = 5.7791e-02, PNorm = 67.7651, GNorm = 0.5242, lr_0 = 2.7798e-04
Loss = 6.1175e-02, PNorm = 67.7695, GNorm = 0.8799, lr_0 = 2.7779e-04
Loss = 6.7463e-02, PNorm = 67.7751, GNorm = 0.5210, lr_0 = 2.7760e-04
Loss = 5.3593e-02, PNorm = 67.7805, GNorm = 0.5424, lr_0 = 2.7741e-04
Loss = 5.8359e-02, PNorm = 67.7835, GNorm = 0.5182, lr_0 = 2.7722e-04
Loss = 5.9105e-02, PNorm = 67.7862, GNorm = 0.4152, lr_0 = 2.7703e-04
Loss = 6.0332e-02, PNorm = 67.7901, GNorm = 0.6399, lr_0 = 2.7684e-04
Loss = 6.3520e-02, PNorm = 67.7951, GNorm = 0.4382, lr_0 = 2.7665e-04
Loss = 5.9563e-02, PNorm = 67.7998, GNorm = 0.6278, lr_0 = 2.7646e-04
Loss = 5.9556e-02, PNorm = 67.8054, GNorm = 0.6489, lr_0 = 2.7627e-04
Loss = 5.6453e-02, PNorm = 67.8116, GNorm = 0.4936, lr_0 = 2.7608e-04
Loss = 5.3786e-02, PNorm = 67.8142, GNorm = 0.6237, lr_0 = 2.7590e-04
Loss = 5.7428e-02, PNorm = 67.8172, GNorm = 0.5871, lr_0 = 2.7571e-04
Loss = 5.9974e-02, PNorm = 67.8194, GNorm = 0.6326, lr_0 = 2.7552e-04
Loss = 5.8375e-02, PNorm = 67.8236, GNorm = 0.6597, lr_0 = 2.7533e-04
Loss = 6.1451e-02, PNorm = 67.8286, GNorm = 0.9175, lr_0 = 2.7514e-04
Loss = 4.7428e-02, PNorm = 67.8319, GNorm = 0.3780, lr_0 = 2.7495e-04
Loss = 5.6975e-02, PNorm = 67.8369, GNorm = 0.5490, lr_0 = 2.7476e-04
Loss = 5.2277e-02, PNorm = 67.8411, GNorm = 0.6671, lr_0 = 2.7457e-04
Loss = 6.0395e-02, PNorm = 67.8471, GNorm = 0.6766, lr_0 = 2.7439e-04
Loss = 5.5552e-02, PNorm = 67.8504, GNorm = 0.4903, lr_0 = 2.7420e-04
Loss = 5.6591e-02, PNorm = 67.8557, GNorm = 0.5667, lr_0 = 2.7401e-04
Loss = 6.3628e-02, PNorm = 67.8572, GNorm = 0.6737, lr_0 = 2.7382e-04
Loss = 6.2000e-02, PNorm = 67.8592, GNorm = 0.4496, lr_0 = 2.7364e-04
Loss = 5.6486e-02, PNorm = 67.8645, GNorm = 0.4675, lr_0 = 2.7345e-04
Loss = 6.7289e-02, PNorm = 67.8722, GNorm = 0.5918, lr_0 = 2.7326e-04
Loss = 6.2540e-02, PNorm = 67.8774, GNorm = 0.7259, lr_0 = 2.7307e-04
Loss = 6.9173e-02, PNorm = 67.8813, GNorm = 0.7622, lr_0 = 2.7289e-04
Loss = 6.6979e-02, PNorm = 67.8877, GNorm = 0.9433, lr_0 = 2.7270e-04
Loss = 5.9237e-02, PNorm = 67.8924, GNorm = 0.6539, lr_0 = 2.7251e-04
Loss = 5.1040e-02, PNorm = 67.8976, GNorm = 0.5307, lr_0 = 2.7233e-04
Loss = 5.4210e-02, PNorm = 67.9019, GNorm = 0.5695, lr_0 = 2.7214e-04
Loss = 6.1199e-02, PNorm = 67.9059, GNorm = 0.6198, lr_0 = 2.7195e-04
Loss = 7.0301e-02, PNorm = 67.9118, GNorm = 0.7482, lr_0 = 2.7177e-04
Loss = 5.2037e-02, PNorm = 67.9172, GNorm = 0.7625, lr_0 = 2.7158e-04
Loss = 6.3063e-02, PNorm = 67.9233, GNorm = 0.4112, lr_0 = 2.7139e-04
Loss = 6.1452e-02, PNorm = 67.9283, GNorm = 1.2125, lr_0 = 2.7121e-04
Loss = 5.9671e-02, PNorm = 67.9329, GNorm = 0.6395, lr_0 = 2.7102e-04
Loss = 6.2835e-02, PNorm = 67.9359, GNorm = 0.5370, lr_0 = 2.7084e-04
Loss = 5.8597e-02, PNorm = 67.9388, GNorm = 0.4960, lr_0 = 2.7065e-04
Loss = 5.8558e-02, PNorm = 67.9434, GNorm = 0.5655, lr_0 = 2.7047e-04
Loss = 5.3547e-02, PNorm = 67.9441, GNorm = 0.7672, lr_0 = 2.7028e-04
Loss = 6.4368e-02, PNorm = 67.9466, GNorm = 0.6182, lr_0 = 2.7010e-04
Loss = 5.6490e-02, PNorm = 67.9534, GNorm = 0.7299, lr_0 = 2.6991e-04
Loss = 5.8808e-02, PNorm = 67.9616, GNorm = 0.4471, lr_0 = 2.6973e-04
Loss = 5.8154e-02, PNorm = 67.9671, GNorm = 0.5253, lr_0 = 2.6954e-04
Loss = 5.5568e-02, PNorm = 67.9730, GNorm = 0.4567, lr_0 = 2.6936e-04
Loss = 5.6065e-02, PNorm = 67.9793, GNorm = 0.6581, lr_0 = 2.6917e-04
Loss = 6.5333e-02, PNorm = 67.9832, GNorm = 0.4516, lr_0 = 2.6899e-04
Loss = 6.2913e-02, PNorm = 67.9875, GNorm = 0.9365, lr_0 = 2.6880e-04
Loss = 5.9795e-02, PNorm = 67.9924, GNorm = 1.0307, lr_0 = 2.6862e-04
Loss = 5.8644e-02, PNorm = 67.9986, GNorm = 0.5730, lr_0 = 2.6844e-04
Loss = 5.9725e-02, PNorm = 68.0027, GNorm = 0.5495, lr_0 = 2.6825e-04
Validation mae = 0.387823
Epoch 18
Loss = 5.9777e-02, PNorm = 68.0044, GNorm = 0.5047, lr_0 = 2.6807e-04
Loss = 4.7068e-02, PNorm = 68.0112, GNorm = 0.6749, lr_0 = 2.6788e-04
Loss = 4.4979e-02, PNorm = 68.0175, GNorm = 0.6273, lr_0 = 2.6770e-04
Loss = 5.3690e-02, PNorm = 68.0230, GNorm = 0.4866, lr_0 = 2.6752e-04
Loss = 4.6859e-02, PNorm = 68.0298, GNorm = 0.6417, lr_0 = 2.6733e-04
Loss = 4.7816e-02, PNorm = 68.0324, GNorm = 0.5905, lr_0 = 2.6715e-04
Loss = 4.9418e-02, PNorm = 68.0370, GNorm = 0.3380, lr_0 = 2.6697e-04
Loss = 5.0657e-02, PNorm = 68.0431, GNorm = 0.7028, lr_0 = 2.6678e-04
Loss = 5.2432e-02, PNorm = 68.0479, GNorm = 0.6323, lr_0 = 2.6660e-04
Loss = 4.7478e-02, PNorm = 68.0516, GNorm = 0.5018, lr_0 = 2.6642e-04
Loss = 5.2863e-02, PNorm = 68.0562, GNorm = 0.7888, lr_0 = 2.6624e-04
Loss = 4.8725e-02, PNorm = 68.0604, GNorm = 0.5157, lr_0 = 2.6605e-04
Loss = 5.5206e-02, PNorm = 68.0657, GNorm = 0.5779, lr_0 = 2.6587e-04
Loss = 4.9190e-02, PNorm = 68.0708, GNorm = 0.4742, lr_0 = 2.6569e-04
Loss = 5.3230e-02, PNorm = 68.0754, GNorm = 0.5101, lr_0 = 2.6551e-04
Loss = 5.5500e-02, PNorm = 68.0791, GNorm = 0.8065, lr_0 = 2.6533e-04
Loss = 5.2824e-02, PNorm = 68.0838, GNorm = 0.6514, lr_0 = 2.6514e-04
Loss = 5.4282e-02, PNorm = 68.0900, GNorm = 0.5426, lr_0 = 2.6496e-04
Loss = 5.1864e-02, PNorm = 68.0950, GNorm = 0.6367, lr_0 = 2.6478e-04
Loss = 4.7967e-02, PNorm = 68.0992, GNorm = 0.9645, lr_0 = 2.6460e-04
Loss = 5.2661e-02, PNorm = 68.1037, GNorm = 0.6619, lr_0 = 2.6442e-04
Loss = 5.3598e-02, PNorm = 68.1084, GNorm = 0.5852, lr_0 = 2.6424e-04
Loss = 5.2605e-02, PNorm = 68.1126, GNorm = 0.5023, lr_0 = 2.6406e-04
Loss = 4.9018e-02, PNorm = 68.1188, GNorm = 0.4684, lr_0 = 2.6388e-04
Loss = 5.6861e-02, PNorm = 68.1235, GNorm = 0.4329, lr_0 = 2.6369e-04
Loss = 5.5196e-02, PNorm = 68.1278, GNorm = 0.6283, lr_0 = 2.6351e-04
Loss = 4.7308e-02, PNorm = 68.1350, GNorm = 0.5510, lr_0 = 2.6333e-04
Loss = 5.3755e-02, PNorm = 68.1417, GNorm = 1.2817, lr_0 = 2.6315e-04
Loss = 6.0716e-02, PNorm = 68.1493, GNorm = 0.7403, lr_0 = 2.6297e-04
Loss = 5.6439e-02, PNorm = 68.1553, GNorm = 0.5432, lr_0 = 2.6279e-04
Loss = 5.3941e-02, PNorm = 68.1610, GNorm = 0.4792, lr_0 = 2.6261e-04
Loss = 5.5784e-02, PNorm = 68.1651, GNorm = 0.4843, lr_0 = 2.6243e-04
Loss = 4.7806e-02, PNorm = 68.1701, GNorm = 0.5087, lr_0 = 2.6225e-04
Loss = 4.6231e-02, PNorm = 68.1746, GNorm = 0.4897, lr_0 = 2.6207e-04
Loss = 5.5977e-02, PNorm = 68.1799, GNorm = 0.5702, lr_0 = 2.6189e-04
Loss = 4.6628e-02, PNorm = 68.1866, GNorm = 0.3399, lr_0 = 2.6171e-04
Loss = 5.4339e-02, PNorm = 68.1909, GNorm = 0.7376, lr_0 = 2.6153e-04
Loss = 5.5194e-02, PNorm = 68.1946, GNorm = 0.4798, lr_0 = 2.6136e-04
Loss = 5.3698e-02, PNorm = 68.2010, GNorm = 0.5550, lr_0 = 2.6118e-04
Loss = 6.0739e-02, PNorm = 68.2058, GNorm = 0.8864, lr_0 = 2.6100e-04
Loss = 6.1211e-02, PNorm = 68.2130, GNorm = 0.7425, lr_0 = 2.6082e-04
Loss = 4.6202e-02, PNorm = 68.2192, GNorm = 0.5674, lr_0 = 2.6064e-04
Loss = 5.2543e-02, PNorm = 68.2241, GNorm = 0.5418, lr_0 = 2.6046e-04
Loss = 6.2004e-02, PNorm = 68.2305, GNorm = 0.4912, lr_0 = 2.6028e-04
Loss = 6.2194e-02, PNorm = 68.2363, GNorm = 0.6518, lr_0 = 2.6011e-04
Loss = 5.2772e-02, PNorm = 68.2402, GNorm = 0.5364, lr_0 = 2.5993e-04
Loss = 5.5146e-02, PNorm = 68.2444, GNorm = 0.4191, lr_0 = 2.5975e-04
Loss = 4.9569e-02, PNorm = 68.2480, GNorm = 0.5374, lr_0 = 2.5957e-04
Loss = 6.1549e-02, PNorm = 68.2520, GNorm = 0.5379, lr_0 = 2.5939e-04
Loss = 4.8976e-02, PNorm = 68.2560, GNorm = 0.7496, lr_0 = 2.5922e-04
Loss = 4.7777e-02, PNorm = 68.2608, GNorm = 0.5627, lr_0 = 2.5904e-04
Loss = 5.5691e-02, PNorm = 68.2671, GNorm = 0.8426, lr_0 = 2.5886e-04
Loss = 5.1071e-02, PNorm = 68.2718, GNorm = 0.4257, lr_0 = 2.5868e-04
Loss = 6.0383e-02, PNorm = 68.2751, GNorm = 0.7876, lr_0 = 2.5851e-04
Loss = 5.5341e-02, PNorm = 68.2801, GNorm = 0.4003, lr_0 = 2.5833e-04
Loss = 4.5730e-02, PNorm = 68.2852, GNorm = 0.4521, lr_0 = 2.5815e-04
Loss = 5.1968e-02, PNorm = 68.2887, GNorm = 0.4148, lr_0 = 2.5797e-04
Loss = 6.4036e-02, PNorm = 68.2946, GNorm = 0.7192, lr_0 = 2.5780e-04
Loss = 5.9785e-02, PNorm = 68.2993, GNorm = 0.5162, lr_0 = 2.5762e-04
Loss = 4.4140e-02, PNorm = 68.3043, GNorm = 0.4222, lr_0 = 2.5745e-04
Loss = 5.1353e-02, PNorm = 68.3082, GNorm = 0.7110, lr_0 = 2.5727e-04
Loss = 5.5339e-02, PNorm = 68.3101, GNorm = 0.5622, lr_0 = 2.5709e-04
Loss = 6.2167e-02, PNorm = 68.3157, GNorm = 0.6172, lr_0 = 2.5692e-04
Loss = 5.8552e-02, PNorm = 68.3199, GNorm = 0.4735, lr_0 = 2.5674e-04
Loss = 6.6781e-02, PNorm = 68.3239, GNorm = 0.4759, lr_0 = 2.5656e-04
Loss = 6.0849e-02, PNorm = 68.3280, GNorm = 0.6854, lr_0 = 2.5639e-04
Loss = 5.4153e-02, PNorm = 68.3326, GNorm = 0.7404, lr_0 = 2.5621e-04
Loss = 6.2081e-02, PNorm = 68.3371, GNorm = 0.7950, lr_0 = 2.5604e-04
Loss = 5.9599e-02, PNorm = 68.3437, GNorm = 0.5002, lr_0 = 2.5586e-04
Loss = 5.1494e-02, PNorm = 68.3476, GNorm = 0.5143, lr_0 = 2.5569e-04
Loss = 4.7372e-02, PNorm = 68.3515, GNorm = 0.4235, lr_0 = 2.5551e-04
Loss = 6.1484e-02, PNorm = 68.3557, GNorm = 0.5417, lr_0 = 2.5534e-04
Loss = 5.6275e-02, PNorm = 68.3613, GNorm = 0.5672, lr_0 = 2.5516e-04
Loss = 5.1669e-02, PNorm = 68.3650, GNorm = 0.6985, lr_0 = 2.5499e-04
Loss = 5.8581e-02, PNorm = 68.3710, GNorm = 0.4673, lr_0 = 2.5481e-04
Loss = 4.9515e-02, PNorm = 68.3759, GNorm = 0.4710, lr_0 = 2.5464e-04
Loss = 5.9718e-02, PNorm = 68.3782, GNorm = 0.4530, lr_0 = 2.5446e-04
Loss = 5.3395e-02, PNorm = 68.3812, GNorm = 1.0004, lr_0 = 2.5429e-04
Loss = 5.8156e-02, PNorm = 68.3863, GNorm = 0.4563, lr_0 = 2.5411e-04
Loss = 5.1918e-02, PNorm = 68.3917, GNorm = 0.3957, lr_0 = 2.5394e-04
Loss = 5.5238e-02, PNorm = 68.3964, GNorm = 0.7434, lr_0 = 2.5377e-04
Loss = 6.2620e-02, PNorm = 68.3993, GNorm = 0.6211, lr_0 = 2.5359e-04
Loss = 6.0630e-02, PNorm = 68.4017, GNorm = 0.4892, lr_0 = 2.5342e-04
Loss = 5.3130e-02, PNorm = 68.4067, GNorm = 0.7104, lr_0 = 2.5325e-04
Loss = 5.5384e-02, PNorm = 68.4129, GNorm = 0.5834, lr_0 = 2.5307e-04
Loss = 4.7956e-02, PNorm = 68.4177, GNorm = 0.3726, lr_0 = 2.5290e-04
Loss = 5.6207e-02, PNorm = 68.4211, GNorm = 1.0194, lr_0 = 2.5273e-04
Loss = 5.9953e-02, PNorm = 68.4249, GNorm = 0.4175, lr_0 = 2.5255e-04
Loss = 5.7537e-02, PNorm = 68.4322, GNorm = 0.6882, lr_0 = 2.5238e-04
Loss = 5.0601e-02, PNorm = 68.4387, GNorm = 0.7174, lr_0 = 2.5221e-04
Loss = 5.2672e-02, PNorm = 68.4417, GNorm = 0.4441, lr_0 = 2.5203e-04
Loss = 5.5328e-02, PNorm = 68.4455, GNorm = 0.7355, lr_0 = 2.5186e-04
Loss = 5.2744e-02, PNorm = 68.4503, GNorm = 0.4038, lr_0 = 2.5169e-04
Loss = 5.1618e-02, PNorm = 68.4545, GNorm = 0.5057, lr_0 = 2.5152e-04
Loss = 5.6140e-02, PNorm = 68.4573, GNorm = 0.4783, lr_0 = 2.5134e-04
Loss = 5.7440e-02, PNorm = 68.4606, GNorm = 0.4903, lr_0 = 2.5117e-04
Loss = 5.2972e-02, PNorm = 68.4632, GNorm = 0.5334, lr_0 = 2.5100e-04
Loss = 4.9430e-02, PNorm = 68.4660, GNorm = 0.4260, lr_0 = 2.5083e-04
Loss = 5.3896e-02, PNorm = 68.4690, GNorm = 0.4642, lr_0 = 2.5066e-04
Loss = 4.9176e-02, PNorm = 68.4709, GNorm = 0.4938, lr_0 = 2.5048e-04
Loss = 5.3116e-02, PNorm = 68.4756, GNorm = 0.4467, lr_0 = 2.5031e-04
Loss = 5.4554e-02, PNorm = 68.4804, GNorm = 0.5521, lr_0 = 2.5014e-04
Loss = 5.5452e-02, PNorm = 68.4844, GNorm = 0.5903, lr_0 = 2.4997e-04
Loss = 5.7445e-02, PNorm = 68.4907, GNorm = 0.5842, lr_0 = 2.4980e-04
Loss = 5.1772e-02, PNorm = 68.4970, GNorm = 1.2054, lr_0 = 2.4963e-04
Loss = 5.9458e-02, PNorm = 68.5023, GNorm = 0.5770, lr_0 = 2.4946e-04
Loss = 6.4110e-02, PNorm = 68.5064, GNorm = 0.5148, lr_0 = 2.4929e-04
Loss = 5.7228e-02, PNorm = 68.5108, GNorm = 0.6847, lr_0 = 2.4911e-04
Loss = 5.2904e-02, PNorm = 68.5173, GNorm = 0.5740, lr_0 = 2.4894e-04
Loss = 5.3676e-02, PNorm = 68.5216, GNorm = 0.5827, lr_0 = 2.4877e-04
Loss = 5.5253e-02, PNorm = 68.5254, GNorm = 0.5781, lr_0 = 2.4860e-04
Loss = 4.6035e-02, PNorm = 68.5272, GNorm = 0.4584, lr_0 = 2.4843e-04
Loss = 5.7231e-02, PNorm = 68.5306, GNorm = 0.8415, lr_0 = 2.4826e-04
Loss = 5.5814e-02, PNorm = 68.5349, GNorm = 0.4868, lr_0 = 2.4809e-04
Loss = 5.2340e-02, PNorm = 68.5413, GNorm = 0.6523, lr_0 = 2.4792e-04
Loss = 4.2511e-02, PNorm = 68.5476, GNorm = 0.7252, lr_0 = 2.4775e-04
Loss = 5.9996e-02, PNorm = 68.5532, GNorm = 0.5431, lr_0 = 2.4758e-04
Loss = 6.3112e-02, PNorm = 68.5567, GNorm = 0.5881, lr_0 = 2.4741e-04
Loss = 5.3220e-02, PNorm = 68.5608, GNorm = 0.4505, lr_0 = 2.4724e-04
Loss = 5.2643e-02, PNorm = 68.5659, GNorm = 0.4093, lr_0 = 2.4707e-04
Validation mae = 0.385474
Epoch 19
Loss = 4.4578e-02, PNorm = 68.5716, GNorm = 0.6615, lr_0 = 2.4690e-04
Loss = 5.0430e-02, PNorm = 68.5763, GNorm = 0.4929, lr_0 = 2.4674e-04
Loss = 5.1226e-02, PNorm = 68.5833, GNorm = 0.4498, lr_0 = 2.4657e-04
Loss = 4.6936e-02, PNorm = 68.5866, GNorm = 0.4801, lr_0 = 2.4640e-04
Loss = 4.6059e-02, PNorm = 68.5915, GNorm = 0.4238, lr_0 = 2.4623e-04
Loss = 4.7289e-02, PNorm = 68.5957, GNorm = 0.5710, lr_0 = 2.4606e-04
Loss = 4.9950e-02, PNorm = 68.6003, GNorm = 0.5088, lr_0 = 2.4589e-04
Loss = 5.2633e-02, PNorm = 68.6044, GNorm = 0.8451, lr_0 = 2.4572e-04
Loss = 5.1318e-02, PNorm = 68.6081, GNorm = 0.3923, lr_0 = 2.4556e-04
Loss = 5.1952e-02, PNorm = 68.6122, GNorm = 0.7404, lr_0 = 2.4539e-04
Loss = 5.5524e-02, PNorm = 68.6169, GNorm = 0.4833, lr_0 = 2.4522e-04
Loss = 4.5722e-02, PNorm = 68.6210, GNorm = 0.7157, lr_0 = 2.4505e-04
Loss = 4.7701e-02, PNorm = 68.6254, GNorm = 0.5165, lr_0 = 2.4488e-04
Loss = 5.0415e-02, PNorm = 68.6291, GNorm = 0.5992, lr_0 = 2.4472e-04
Loss = 4.6691e-02, PNorm = 68.6330, GNorm = 0.6166, lr_0 = 2.4455e-04
Loss = 4.8029e-02, PNorm = 68.6364, GNorm = 0.4592, lr_0 = 2.4438e-04
Loss = 4.4165e-02, PNorm = 68.6413, GNorm = 0.5003, lr_0 = 2.4421e-04
Loss = 4.5814e-02, PNorm = 68.6463, GNorm = 0.4427, lr_0 = 2.4405e-04
Loss = 5.0759e-02, PNorm = 68.6483, GNorm = 0.7269, lr_0 = 2.4388e-04
Loss = 4.5689e-02, PNorm = 68.6514, GNorm = 0.4511, lr_0 = 2.4371e-04
Loss = 4.8953e-02, PNorm = 68.6557, GNorm = 0.4613, lr_0 = 2.4354e-04
Loss = 4.4362e-02, PNorm = 68.6593, GNorm = 0.3606, lr_0 = 2.4338e-04
Loss = 4.3565e-02, PNorm = 68.6621, GNorm = 0.6972, lr_0 = 2.4321e-04
Loss = 5.3473e-02, PNorm = 68.6644, GNorm = 0.5130, lr_0 = 2.4304e-04
Loss = 6.1755e-02, PNorm = 68.6698, GNorm = 0.4789, lr_0 = 2.4288e-04
Loss = 4.9450e-02, PNorm = 68.6745, GNorm = 0.4437, lr_0 = 2.4271e-04
Loss = 5.5150e-02, PNorm = 68.6801, GNorm = 0.9764, lr_0 = 2.4254e-04
Loss = 5.2913e-02, PNorm = 68.6861, GNorm = 0.3574, lr_0 = 2.4238e-04
Loss = 4.9870e-02, PNorm = 68.6903, GNorm = 0.5725, lr_0 = 2.4221e-04
Loss = 5.1712e-02, PNorm = 68.6951, GNorm = 1.1769, lr_0 = 2.4205e-04
Loss = 5.5712e-02, PNorm = 68.6996, GNorm = 0.4861, lr_0 = 2.4188e-04
Loss = 4.7699e-02, PNorm = 68.7039, GNorm = 0.5663, lr_0 = 2.4171e-04
Loss = 5.1471e-02, PNorm = 68.7088, GNorm = 0.4184, lr_0 = 2.4155e-04
Loss = 4.5134e-02, PNorm = 68.7127, GNorm = 0.4263, lr_0 = 2.4138e-04
Loss = 4.2043e-02, PNorm = 68.7152, GNorm = 0.5009, lr_0 = 2.4122e-04
Loss = 5.6629e-02, PNorm = 68.7184, GNorm = 0.6661, lr_0 = 2.4105e-04
Loss = 4.8179e-02, PNorm = 68.7241, GNorm = 0.6029, lr_0 = 2.4089e-04
Loss = 5.3252e-02, PNorm = 68.7284, GNorm = 0.5055, lr_0 = 2.4072e-04
Loss = 5.3675e-02, PNorm = 68.7315, GNorm = 0.5274, lr_0 = 2.4056e-04
Loss = 4.9206e-02, PNorm = 68.7365, GNorm = 0.5192, lr_0 = 2.4039e-04
Loss = 4.5980e-02, PNorm = 68.7415, GNorm = 0.4240, lr_0 = 2.4023e-04
Loss = 4.9977e-02, PNorm = 68.7457, GNorm = 0.3987, lr_0 = 2.4006e-04
Loss = 4.7619e-02, PNorm = 68.7487, GNorm = 0.5067, lr_0 = 2.3990e-04
Loss = 5.0469e-02, PNorm = 68.7539, GNorm = 0.4257, lr_0 = 2.3974e-04
Loss = 4.4944e-02, PNorm = 68.7557, GNorm = 0.7601, lr_0 = 2.3957e-04
Loss = 4.2520e-02, PNorm = 68.7585, GNorm = 0.7895, lr_0 = 2.3941e-04
Loss = 4.5525e-02, PNorm = 68.7631, GNorm = 0.4946, lr_0 = 2.3924e-04
Loss = 5.7651e-02, PNorm = 68.7694, GNorm = 0.4918, lr_0 = 2.3908e-04
Loss = 5.0098e-02, PNorm = 68.7745, GNorm = 0.4248, lr_0 = 2.3892e-04
Loss = 4.9144e-02, PNorm = 68.7777, GNorm = 0.6550, lr_0 = 2.3875e-04
Loss = 5.3275e-02, PNorm = 68.7811, GNorm = 0.3853, lr_0 = 2.3859e-04
Loss = 4.8492e-02, PNorm = 68.7832, GNorm = 0.3085, lr_0 = 2.3842e-04
Loss = 4.4361e-02, PNorm = 68.7872, GNorm = 0.8723, lr_0 = 2.3826e-04
Loss = 5.4788e-02, PNorm = 68.7927, GNorm = 1.2529, lr_0 = 2.3810e-04
Loss = 4.9128e-02, PNorm = 68.7970, GNorm = 0.7563, lr_0 = 2.3794e-04
Loss = 5.5015e-02, PNorm = 68.8011, GNorm = 0.6306, lr_0 = 2.3777e-04
Loss = 4.5462e-02, PNorm = 68.8054, GNorm = 0.4523, lr_0 = 2.3761e-04
Loss = 5.7748e-02, PNorm = 68.8069, GNorm = 1.0705, lr_0 = 2.3745e-04
Loss = 5.5554e-02, PNorm = 68.8112, GNorm = 0.5421, lr_0 = 2.3728e-04
Loss = 4.6122e-02, PNorm = 68.8170, GNorm = 0.5799, lr_0 = 2.3712e-04
Loss = 5.2882e-02, PNorm = 68.8224, GNorm = 0.6369, lr_0 = 2.3696e-04
Loss = 5.7124e-02, PNorm = 68.8266, GNorm = 0.4712, lr_0 = 2.3680e-04
Loss = 5.4559e-02, PNorm = 68.8320, GNorm = 0.8543, lr_0 = 2.3663e-04
Loss = 5.9197e-02, PNorm = 68.8377, GNorm = 0.7957, lr_0 = 2.3647e-04
Loss = 4.8379e-02, PNorm = 68.8437, GNorm = 0.5226, lr_0 = 2.3631e-04
Loss = 5.1170e-02, PNorm = 68.8475, GNorm = 0.4997, lr_0 = 2.3615e-04
Loss = 5.3332e-02, PNorm = 68.8517, GNorm = 0.7795, lr_0 = 2.3599e-04
Loss = 4.8861e-02, PNorm = 68.8556, GNorm = 0.4579, lr_0 = 2.3582e-04
Loss = 4.8050e-02, PNorm = 68.8603, GNorm = 0.4483, lr_0 = 2.3566e-04
Loss = 5.2158e-02, PNorm = 68.8653, GNorm = 0.3800, lr_0 = 2.3550e-04
Loss = 4.7877e-02, PNorm = 68.8677, GNorm = 0.5903, lr_0 = 2.3534e-04
Loss = 4.7230e-02, PNorm = 68.8692, GNorm = 0.4624, lr_0 = 2.3518e-04
Loss = 4.7287e-02, PNorm = 68.8736, GNorm = 0.6693, lr_0 = 2.3502e-04
Loss = 5.1628e-02, PNorm = 68.8781, GNorm = 0.4708, lr_0 = 2.3486e-04
Loss = 4.7365e-02, PNorm = 68.8825, GNorm = 0.5195, lr_0 = 2.3470e-04
Loss = 4.7409e-02, PNorm = 68.8881, GNorm = 0.3587, lr_0 = 2.3454e-04
Loss = 5.2133e-02, PNorm = 68.8928, GNorm = 0.4860, lr_0 = 2.3437e-04
Loss = 4.8431e-02, PNorm = 68.8973, GNorm = 0.3738, lr_0 = 2.3421e-04
Loss = 5.8456e-02, PNorm = 68.9011, GNorm = 0.8696, lr_0 = 2.3405e-04
Loss = 5.4572e-02, PNorm = 68.9067, GNorm = 0.4415, lr_0 = 2.3389e-04
Loss = 4.8766e-02, PNorm = 68.9083, GNorm = 0.5408, lr_0 = 2.3373e-04
Loss = 4.9244e-02, PNorm = 68.9101, GNorm = 0.7025, lr_0 = 2.3357e-04
Loss = 5.0001e-02, PNorm = 68.9137, GNorm = 0.6890, lr_0 = 2.3341e-04
Loss = 4.5014e-02, PNorm = 68.9178, GNorm = 0.4800, lr_0 = 2.3325e-04
Loss = 4.5665e-02, PNorm = 68.9201, GNorm = 0.7220, lr_0 = 2.3309e-04
Loss = 5.7398e-02, PNorm = 68.9229, GNorm = 0.5814, lr_0 = 2.3293e-04
Loss = 6.3639e-02, PNorm = 68.9274, GNorm = 0.7763, lr_0 = 2.3277e-04
Loss = 5.2638e-02, PNorm = 68.9313, GNorm = 0.5349, lr_0 = 2.3261e-04
Loss = 4.6813e-02, PNorm = 68.9356, GNorm = 0.6129, lr_0 = 2.3246e-04
Loss = 5.2247e-02, PNorm = 68.9408, GNorm = 0.4566, lr_0 = 2.3230e-04
Loss = 5.1134e-02, PNorm = 68.9444, GNorm = 0.5066, lr_0 = 2.3214e-04
Loss = 5.9042e-02, PNorm = 68.9488, GNorm = 0.6457, lr_0 = 2.3198e-04
Loss = 4.7812e-02, PNorm = 68.9546, GNorm = 0.4908, lr_0 = 2.3182e-04
Loss = 5.2166e-02, PNorm = 68.9585, GNorm = 0.5967, lr_0 = 2.3166e-04
Loss = 4.8184e-02, PNorm = 68.9642, GNorm = 0.5067, lr_0 = 2.3150e-04
Loss = 6.0614e-02, PNorm = 68.9681, GNorm = 0.4868, lr_0 = 2.3134e-04
Loss = 5.0751e-02, PNorm = 68.9717, GNorm = 0.5381, lr_0 = 2.3118e-04
Loss = 5.3729e-02, PNorm = 68.9748, GNorm = 0.7577, lr_0 = 2.3103e-04
Loss = 4.2121e-02, PNorm = 68.9780, GNorm = 0.6507, lr_0 = 2.3087e-04
Loss = 4.7731e-02, PNorm = 68.9820, GNorm = 0.5354, lr_0 = 2.3071e-04
Loss = 5.7811e-02, PNorm = 68.9871, GNorm = 0.6237, lr_0 = 2.3055e-04
Loss = 4.8111e-02, PNorm = 68.9912, GNorm = 0.4848, lr_0 = 2.3039e-04
Loss = 3.8540e-02, PNorm = 68.9940, GNorm = 0.5335, lr_0 = 2.3024e-04
Loss = 5.4624e-02, PNorm = 68.9975, GNorm = 0.7437, lr_0 = 2.3008e-04
Loss = 6.3926e-02, PNorm = 69.0017, GNorm = 1.1319, lr_0 = 2.2992e-04
Loss = 6.3249e-02, PNorm = 69.0062, GNorm = 0.5611, lr_0 = 2.2976e-04
Loss = 4.8078e-02, PNorm = 69.0103, GNorm = 0.4419, lr_0 = 2.2961e-04
Loss = 4.9520e-02, PNorm = 69.0140, GNorm = 0.3802, lr_0 = 2.2945e-04
Loss = 5.7762e-02, PNorm = 69.0184, GNorm = 0.6631, lr_0 = 2.2929e-04
Loss = 5.2167e-02, PNorm = 69.0250, GNorm = 0.5599, lr_0 = 2.2913e-04
Loss = 6.2241e-02, PNorm = 69.0279, GNorm = 0.5282, lr_0 = 2.2898e-04
Loss = 5.4472e-02, PNorm = 69.0301, GNorm = 0.4799, lr_0 = 2.2882e-04
Loss = 5.7361e-02, PNorm = 69.0332, GNorm = 0.5920, lr_0 = 2.2866e-04
Loss = 5.6557e-02, PNorm = 69.0380, GNorm = 0.5526, lr_0 = 2.2851e-04
Loss = 4.4315e-02, PNorm = 69.0430, GNorm = 0.5945, lr_0 = 2.2835e-04
Loss = 4.7567e-02, PNorm = 69.0467, GNorm = 0.6561, lr_0 = 2.2819e-04
Loss = 5.3427e-02, PNorm = 69.0500, GNorm = 0.9276, lr_0 = 2.2804e-04
Loss = 5.1983e-02, PNorm = 69.0538, GNorm = 0.6691, lr_0 = 2.2788e-04
Loss = 5.6862e-02, PNorm = 69.0580, GNorm = 0.5267, lr_0 = 2.2773e-04
Loss = 5.3397e-02, PNorm = 69.0609, GNorm = 0.4673, lr_0 = 2.2757e-04
Validation mae = 0.393281
Epoch 20
Loss = 4.1841e-02, PNorm = 69.0632, GNorm = 0.4412, lr_0 = 2.2741e-04
Loss = 4.9838e-02, PNorm = 69.0693, GNorm = 0.3923, lr_0 = 2.2726e-04
Loss = 4.4876e-02, PNorm = 69.0741, GNorm = 0.5871, lr_0 = 2.2710e-04
Loss = 3.8670e-02, PNorm = 69.0767, GNorm = 0.5107, lr_0 = 2.2695e-04
Loss = 4.4255e-02, PNorm = 69.0800, GNorm = 0.4832, lr_0 = 2.2679e-04
Loss = 4.4470e-02, PNorm = 69.0856, GNorm = 1.0484, lr_0 = 2.2664e-04
Loss = 4.7265e-02, PNorm = 69.0912, GNorm = 0.4386, lr_0 = 2.2648e-04
Loss = 4.8651e-02, PNorm = 69.0955, GNorm = 0.7698, lr_0 = 2.2632e-04
Loss = 3.8453e-02, PNorm = 69.0998, GNorm = 0.4015, lr_0 = 2.2617e-04
Loss = 4.1728e-02, PNorm = 69.1038, GNorm = 0.6046, lr_0 = 2.2601e-04
Loss = 4.9589e-02, PNorm = 69.1071, GNorm = 0.6253, lr_0 = 2.2586e-04
Loss = 4.0800e-02, PNorm = 69.1101, GNorm = 0.5107, lr_0 = 2.2571e-04
Loss = 4.1295e-02, PNorm = 69.1126, GNorm = 0.4399, lr_0 = 2.2555e-04
Loss = 4.4514e-02, PNorm = 69.1145, GNorm = 0.4095, lr_0 = 2.2540e-04
Loss = 4.1851e-02, PNorm = 69.1172, GNorm = 0.4331, lr_0 = 2.2524e-04
Loss = 4.5565e-02, PNorm = 69.1206, GNorm = 0.4138, lr_0 = 2.2509e-04
Loss = 4.8277e-02, PNorm = 69.1237, GNorm = 0.5687, lr_0 = 2.2493e-04
Loss = 4.0897e-02, PNorm = 69.1264, GNorm = 0.5621, lr_0 = 2.2478e-04
Loss = 4.3842e-02, PNorm = 69.1292, GNorm = 0.5160, lr_0 = 2.2463e-04
Loss = 4.5204e-02, PNorm = 69.1323, GNorm = 0.5220, lr_0 = 2.2447e-04
Loss = 4.5395e-02, PNorm = 69.1358, GNorm = 0.4803, lr_0 = 2.2432e-04
Loss = 4.2341e-02, PNorm = 69.1415, GNorm = 0.4576, lr_0 = 2.2416e-04
Loss = 4.6497e-02, PNorm = 69.1458, GNorm = 0.7178, lr_0 = 2.2401e-04
Loss = 4.2420e-02, PNorm = 69.1490, GNorm = 0.4698, lr_0 = 2.2386e-04
Loss = 5.4433e-02, PNorm = 69.1529, GNorm = 0.5035, lr_0 = 2.2370e-04
Loss = 4.6531e-02, PNorm = 69.1565, GNorm = 0.6774, lr_0 = 2.2355e-04
Loss = 5.3037e-02, PNorm = 69.1589, GNorm = 0.5796, lr_0 = 2.2340e-04
Loss = 4.5308e-02, PNorm = 69.1634, GNorm = 0.6166, lr_0 = 2.2324e-04
Loss = 6.1107e-02, PNorm = 69.1687, GNorm = 0.4414, lr_0 = 2.2309e-04
Loss = 5.4261e-02, PNorm = 69.1730, GNorm = 0.5679, lr_0 = 2.2294e-04
Loss = 5.0347e-02, PNorm = 69.1788, GNorm = 0.7471, lr_0 = 2.2279e-04
Loss = 4.5292e-02, PNorm = 69.1857, GNorm = 0.5231, lr_0 = 2.2263e-04
Loss = 4.5675e-02, PNorm = 69.1909, GNorm = 0.6437, lr_0 = 2.2248e-04
Loss = 4.5445e-02, PNorm = 69.1950, GNorm = 0.4504, lr_0 = 2.2233e-04
Loss = 5.0487e-02, PNorm = 69.1977, GNorm = 0.6277, lr_0 = 2.2218e-04
Loss = 5.4958e-02, PNorm = 69.2035, GNorm = 0.5679, lr_0 = 2.2202e-04
Loss = 4.3330e-02, PNorm = 69.2082, GNorm = 0.4351, lr_0 = 2.2187e-04
Loss = 5.0319e-02, PNorm = 69.2115, GNorm = 0.5493, lr_0 = 2.2172e-04
Loss = 4.9078e-02, PNorm = 69.2153, GNorm = 0.9605, lr_0 = 2.2157e-04
Loss = 5.2540e-02, PNorm = 69.2191, GNorm = 0.5212, lr_0 = 2.2142e-04
Loss = 4.8108e-02, PNorm = 69.2225, GNorm = 0.7724, lr_0 = 2.2126e-04
Loss = 5.1785e-02, PNorm = 69.2261, GNorm = 0.4147, lr_0 = 2.2111e-04
Loss = 4.8563e-02, PNorm = 69.2270, GNorm = 0.4249, lr_0 = 2.2096e-04
Loss = 5.1831e-02, PNorm = 69.2284, GNorm = 0.6875, lr_0 = 2.2081e-04
Loss = 4.8626e-02, PNorm = 69.2302, GNorm = 0.7553, lr_0 = 2.2066e-04
Loss = 4.9270e-02, PNorm = 69.2342, GNorm = 0.4389, lr_0 = 2.2051e-04
Loss = 4.9200e-02, PNorm = 69.2385, GNorm = 0.5896, lr_0 = 2.2036e-04
Loss = 4.6254e-02, PNorm = 69.2439, GNorm = 0.5143, lr_0 = 2.2021e-04
Loss = 4.3470e-02, PNorm = 69.2489, GNorm = 0.5854, lr_0 = 2.2005e-04
Loss = 4.0708e-02, PNorm = 69.2519, GNorm = 0.4791, lr_0 = 2.1990e-04
Loss = 4.6683e-02, PNorm = 69.2564, GNorm = 0.4262, lr_0 = 2.1975e-04
Loss = 4.3773e-02, PNorm = 69.2601, GNorm = 0.5550, lr_0 = 2.1960e-04
Loss = 4.4809e-02, PNorm = 69.2626, GNorm = 0.3602, lr_0 = 2.1945e-04
Loss = 5.5026e-02, PNorm = 69.2663, GNorm = 0.7217, lr_0 = 2.1930e-04
Loss = 3.7587e-02, PNorm = 69.2694, GNorm = 0.4811, lr_0 = 2.1915e-04
Loss = 5.0754e-02, PNorm = 69.2707, GNorm = 0.4291, lr_0 = 2.1900e-04
Loss = 4.7737e-02, PNorm = 69.2715, GNorm = 0.3932, lr_0 = 2.1885e-04
Loss = 4.8066e-02, PNorm = 69.2761, GNorm = 0.7451, lr_0 = 2.1870e-04
Loss = 4.1272e-02, PNorm = 69.2813, GNorm = 0.4094, lr_0 = 2.1855e-04
Loss = 4.8868e-02, PNorm = 69.2868, GNorm = 0.5804, lr_0 = 2.1840e-04
Loss = 4.6565e-02, PNorm = 69.2914, GNorm = 0.7437, lr_0 = 2.1825e-04
Loss = 4.7054e-02, PNorm = 69.2952, GNorm = 0.6720, lr_0 = 2.1810e-04
Loss = 4.8248e-02, PNorm = 69.2973, GNorm = 0.8972, lr_0 = 2.1795e-04
Loss = 5.3074e-02, PNorm = 69.2999, GNorm = 0.5729, lr_0 = 2.1780e-04
Loss = 4.3942e-02, PNorm = 69.3047, GNorm = 0.5155, lr_0 = 2.1765e-04
Loss = 4.7980e-02, PNorm = 69.3084, GNorm = 0.7584, lr_0 = 2.1751e-04
Loss = 4.8751e-02, PNorm = 69.3123, GNorm = 0.5830, lr_0 = 2.1736e-04
Loss = 5.1294e-02, PNorm = 69.3168, GNorm = 0.3971, lr_0 = 2.1721e-04
Loss = 5.0633e-02, PNorm = 69.3220, GNorm = 0.6299, lr_0 = 2.1706e-04
Loss = 4.4860e-02, PNorm = 69.3252, GNorm = 0.7325, lr_0 = 2.1691e-04
Loss = 4.3889e-02, PNorm = 69.3286, GNorm = 0.4762, lr_0 = 2.1676e-04
Loss = 4.6834e-02, PNorm = 69.3303, GNorm = 0.5233, lr_0 = 2.1661e-04
Loss = 4.5551e-02, PNorm = 69.3317, GNorm = 0.6557, lr_0 = 2.1646e-04
Loss = 5.0281e-02, PNorm = 69.3329, GNorm = 0.4792, lr_0 = 2.1632e-04
Loss = 4.5919e-02, PNorm = 69.3361, GNorm = 0.4178, lr_0 = 2.1617e-04
Loss = 4.8457e-02, PNorm = 69.3413, GNorm = 0.6100, lr_0 = 2.1602e-04
Loss = 4.7140e-02, PNorm = 69.3435, GNorm = 0.5735, lr_0 = 2.1587e-04
Loss = 4.9973e-02, PNorm = 69.3454, GNorm = 0.7715, lr_0 = 2.1572e-04
Loss = 4.6592e-02, PNorm = 69.3481, GNorm = 0.5241, lr_0 = 2.1558e-04
Loss = 5.1399e-02, PNorm = 69.3506, GNorm = 0.4631, lr_0 = 2.1543e-04
Loss = 4.6689e-02, PNorm = 69.3551, GNorm = 0.5857, lr_0 = 2.1528e-04
Loss = 5.2745e-02, PNorm = 69.3603, GNorm = 0.5812, lr_0 = 2.1513e-04
Loss = 4.7248e-02, PNorm = 69.3655, GNorm = 0.6466, lr_0 = 2.1499e-04
Loss = 5.1094e-02, PNorm = 69.3689, GNorm = 0.4468, lr_0 = 2.1484e-04
Loss = 4.7016e-02, PNorm = 69.3709, GNorm = 0.7132, lr_0 = 2.1469e-04
Loss = 4.9587e-02, PNorm = 69.3728, GNorm = 0.7823, lr_0 = 2.1454e-04
Loss = 5.1416e-02, PNorm = 69.3780, GNorm = 0.5993, lr_0 = 2.1440e-04
Loss = 4.9716e-02, PNorm = 69.3831, GNorm = 0.5231, lr_0 = 2.1425e-04
Loss = 4.2337e-02, PNorm = 69.3861, GNorm = 0.4547, lr_0 = 2.1410e-04
Loss = 4.4052e-02, PNorm = 69.3884, GNorm = 0.5525, lr_0 = 2.1396e-04
Loss = 5.5533e-02, PNorm = 69.3909, GNorm = 0.4380, lr_0 = 2.1381e-04
Loss = 4.6447e-02, PNorm = 69.3941, GNorm = 0.5227, lr_0 = 2.1366e-04
Loss = 4.2875e-02, PNorm = 69.3980, GNorm = 0.5225, lr_0 = 2.1352e-04
Loss = 4.3391e-02, PNorm = 69.4028, GNorm = 0.6073, lr_0 = 2.1337e-04
Loss = 4.8011e-02, PNorm = 69.4095, GNorm = 0.5247, lr_0 = 2.1323e-04
Loss = 4.8238e-02, PNorm = 69.4151, GNorm = 0.4232, lr_0 = 2.1308e-04
Loss = 4.9708e-02, PNorm = 69.4192, GNorm = 0.5039, lr_0 = 2.1293e-04
Loss = 4.7839e-02, PNorm = 69.4214, GNorm = 0.5073, lr_0 = 2.1279e-04
Loss = 5.2168e-02, PNorm = 69.4241, GNorm = 0.5972, lr_0 = 2.1264e-04
Loss = 5.2226e-02, PNorm = 69.4263, GNorm = 0.7061, lr_0 = 2.1250e-04
Loss = 4.6475e-02, PNorm = 69.4290, GNorm = 0.4294, lr_0 = 2.1235e-04
Loss = 5.0270e-02, PNorm = 69.4324, GNorm = 0.6030, lr_0 = 2.1221e-04
Loss = 4.9214e-02, PNorm = 69.4366, GNorm = 0.5094, lr_0 = 2.1206e-04
Loss = 5.1113e-02, PNorm = 69.4404, GNorm = 0.4633, lr_0 = 2.1191e-04
Loss = 5.6314e-02, PNorm = 69.4431, GNorm = 0.5245, lr_0 = 2.1177e-04
Loss = 5.5672e-02, PNorm = 69.4463, GNorm = 0.7400, lr_0 = 2.1162e-04
Loss = 4.5062e-02, PNorm = 69.4508, GNorm = 0.4897, lr_0 = 2.1148e-04
Loss = 4.0144e-02, PNorm = 69.4544, GNorm = 0.5622, lr_0 = 2.1133e-04
Loss = 4.7215e-02, PNorm = 69.4581, GNorm = 0.4628, lr_0 = 2.1119e-04
Loss = 4.7819e-02, PNorm = 69.4650, GNorm = 0.4847, lr_0 = 2.1104e-04
Loss = 4.1350e-02, PNorm = 69.4681, GNorm = 0.4944, lr_0 = 2.1090e-04
Loss = 5.2424e-02, PNorm = 69.4704, GNorm = 0.8497, lr_0 = 2.1076e-04
Loss = 5.5005e-02, PNorm = 69.4719, GNorm = 0.4882, lr_0 = 2.1061e-04
Loss = 4.8733e-02, PNorm = 69.4736, GNorm = 0.5742, lr_0 = 2.1047e-04
Loss = 4.0268e-02, PNorm = 69.4757, GNorm = 0.5131, lr_0 = 2.1032e-04
Loss = 4.5199e-02, PNorm = 69.4795, GNorm = 0.4815, lr_0 = 2.1018e-04
Loss = 5.5234e-02, PNorm = 69.4833, GNorm = 0.5858, lr_0 = 2.1003e-04
Loss = 5.0101e-02, PNorm = 69.4878, GNorm = 0.6707, lr_0 = 2.0989e-04
Loss = 4.5739e-02, PNorm = 69.4916, GNorm = 0.4338, lr_0 = 2.0975e-04
Loss = 4.5837e-02, PNorm = 69.4945, GNorm = 0.4500, lr_0 = 2.0960e-04
Validation mae = 0.385387
Epoch 21
Loss = 4.2799e-02, PNorm = 69.4982, GNorm = 0.7611, lr_0 = 2.0946e-04
Loss = 4.3904e-02, PNorm = 69.5005, GNorm = 0.5291, lr_0 = 2.0932e-04
Loss = 3.8434e-02, PNorm = 69.5020, GNorm = 0.5380, lr_0 = 2.0917e-04
Loss = 4.1970e-02, PNorm = 69.5043, GNorm = 0.7020, lr_0 = 2.0903e-04
Loss = 4.3100e-02, PNorm = 69.5075, GNorm = 0.5344, lr_0 = 2.0889e-04
Loss = 3.9856e-02, PNorm = 69.5102, GNorm = 0.6278, lr_0 = 2.0874e-04
Loss = 4.1999e-02, PNorm = 69.5128, GNorm = 0.4044, lr_0 = 2.0860e-04
Loss = 4.2239e-02, PNorm = 69.5168, GNorm = 0.3667, lr_0 = 2.0846e-04
Loss = 4.9297e-02, PNorm = 69.5186, GNorm = 0.4844, lr_0 = 2.0831e-04
Loss = 4.3944e-02, PNorm = 69.5210, GNorm = 0.4046, lr_0 = 2.0817e-04
Loss = 3.9948e-02, PNorm = 69.5241, GNorm = 0.3951, lr_0 = 2.0803e-04
Loss = 4.6159e-02, PNorm = 69.5288, GNorm = 0.9533, lr_0 = 2.0789e-04
Loss = 4.0843e-02, PNorm = 69.5323, GNorm = 0.4372, lr_0 = 2.0774e-04
Loss = 4.4223e-02, PNorm = 69.5360, GNorm = 0.5049, lr_0 = 2.0760e-04
Loss = 4.5488e-02, PNorm = 69.5388, GNorm = 0.4739, lr_0 = 2.0746e-04
Loss = 4.3638e-02, PNorm = 69.5416, GNorm = 0.4919, lr_0 = 2.0732e-04
Loss = 4.8107e-02, PNorm = 69.5459, GNorm = 0.5893, lr_0 = 2.0718e-04
Loss = 4.0436e-02, PNorm = 69.5513, GNorm = 0.4969, lr_0 = 2.0703e-04
Loss = 4.7860e-02, PNorm = 69.5551, GNorm = 0.9038, lr_0 = 2.0689e-04
Loss = 3.9206e-02, PNorm = 69.5590, GNorm = 0.3479, lr_0 = 2.0675e-04
Loss = 4.8188e-02, PNorm = 69.5619, GNorm = 0.4819, lr_0 = 2.0661e-04
Loss = 4.8090e-02, PNorm = 69.5655, GNorm = 0.4363, lr_0 = 2.0647e-04
Loss = 4.3936e-02, PNorm = 69.5687, GNorm = 0.6874, lr_0 = 2.0633e-04
Loss = 4.3159e-02, PNorm = 69.5727, GNorm = 0.5609, lr_0 = 2.0618e-04
Loss = 3.8767e-02, PNorm = 69.5751, GNorm = 0.7260, lr_0 = 2.0604e-04
Loss = 4.3661e-02, PNorm = 69.5794, GNorm = 0.5412, lr_0 = 2.0590e-04
Loss = 4.5215e-02, PNorm = 69.5840, GNorm = 0.4011, lr_0 = 2.0576e-04
Loss = 4.1491e-02, PNorm = 69.5878, GNorm = 0.5577, lr_0 = 2.0562e-04
Loss = 3.8739e-02, PNorm = 69.5908, GNorm = 0.6204, lr_0 = 2.0548e-04
Loss = 3.9474e-02, PNorm = 69.5940, GNorm = 0.5016, lr_0 = 2.0534e-04
Loss = 5.4104e-02, PNorm = 69.5976, GNorm = 0.6643, lr_0 = 2.0520e-04
Loss = 4.6357e-02, PNorm = 69.6015, GNorm = 0.9564, lr_0 = 2.0506e-04
Loss = 4.3253e-02, PNorm = 69.6049, GNorm = 0.5411, lr_0 = 2.0492e-04
Loss = 4.3369e-02, PNorm = 69.6088, GNorm = 0.5121, lr_0 = 2.0478e-04
Loss = 4.6810e-02, PNorm = 69.6125, GNorm = 0.9419, lr_0 = 2.0464e-04
Loss = 4.4819e-02, PNorm = 69.6148, GNorm = 0.4052, lr_0 = 2.0450e-04
Loss = 4.2744e-02, PNorm = 69.6169, GNorm = 0.4585, lr_0 = 2.0436e-04
Loss = 4.8247e-02, PNorm = 69.6209, GNorm = 0.7876, lr_0 = 2.0422e-04
Loss = 4.4177e-02, PNorm = 69.6254, GNorm = 0.6196, lr_0 = 2.0408e-04
Loss = 4.5921e-02, PNorm = 69.6285, GNorm = 0.7723, lr_0 = 2.0394e-04
Loss = 4.1493e-02, PNorm = 69.6304, GNorm = 0.4736, lr_0 = 2.0380e-04
Loss = 4.8719e-02, PNorm = 69.6334, GNorm = 0.7323, lr_0 = 2.0366e-04
Loss = 4.1922e-02, PNorm = 69.6375, GNorm = 0.3696, lr_0 = 2.0352e-04
Loss = 4.0887e-02, PNorm = 69.6409, GNorm = 0.5250, lr_0 = 2.0338e-04
Loss = 4.2493e-02, PNorm = 69.6435, GNorm = 0.5331, lr_0 = 2.0324e-04
Loss = 4.3799e-02, PNorm = 69.6464, GNorm = 0.4910, lr_0 = 2.0310e-04
Loss = 4.5946e-02, PNorm = 69.6488, GNorm = 0.6024, lr_0 = 2.0296e-04
Loss = 4.3267e-02, PNorm = 69.6529, GNorm = 0.3697, lr_0 = 2.0282e-04
Loss = 4.3744e-02, PNorm = 69.6564, GNorm = 0.8353, lr_0 = 2.0268e-04
Loss = 4.6072e-02, PNorm = 69.6596, GNorm = 0.5720, lr_0 = 2.0254e-04
Loss = 3.8531e-02, PNorm = 69.6628, GNorm = 0.3274, lr_0 = 2.0240e-04
Loss = 4.4503e-02, PNorm = 69.6653, GNorm = 0.5067, lr_0 = 2.0227e-04
Loss = 4.2798e-02, PNorm = 69.6674, GNorm = 0.4266, lr_0 = 2.0213e-04
Loss = 4.3242e-02, PNorm = 69.6699, GNorm = 0.7530, lr_0 = 2.0199e-04
Loss = 5.0319e-02, PNorm = 69.6716, GNorm = 0.5180, lr_0 = 2.0185e-04
Loss = 3.7958e-02, PNorm = 69.6738, GNorm = 0.8440, lr_0 = 2.0171e-04
Loss = 4.5202e-02, PNorm = 69.6768, GNorm = 0.6142, lr_0 = 2.0157e-04
Loss = 4.5422e-02, PNorm = 69.6791, GNorm = 0.6307, lr_0 = 2.0144e-04
Loss = 4.4048e-02, PNorm = 69.6815, GNorm = 0.3941, lr_0 = 2.0130e-04
Loss = 4.6010e-02, PNorm = 69.6857, GNorm = 0.5623, lr_0 = 2.0116e-04
Loss = 3.8508e-02, PNorm = 69.6882, GNorm = 0.4167, lr_0 = 2.0102e-04
Loss = 4.5218e-02, PNorm = 69.6907, GNorm = 0.3528, lr_0 = 2.0088e-04
Loss = 3.4817e-02, PNorm = 69.6933, GNorm = 0.3969, lr_0 = 2.0075e-04
Loss = 4.6739e-02, PNorm = 69.6971, GNorm = 0.4501, lr_0 = 2.0061e-04
Loss = 4.1039e-02, PNorm = 69.6995, GNorm = 0.4419, lr_0 = 2.0047e-04
Loss = 4.2412e-02, PNorm = 69.7030, GNorm = 0.5490, lr_0 = 2.0033e-04
Loss = 4.4974e-02, PNorm = 69.7063, GNorm = 0.5613, lr_0 = 2.0020e-04
Loss = 4.7603e-02, PNorm = 69.7101, GNorm = 0.5022, lr_0 = 2.0006e-04
Loss = 4.4631e-02, PNorm = 69.7134, GNorm = 0.4191, lr_0 = 1.9992e-04
Loss = 4.4244e-02, PNorm = 69.7172, GNorm = 0.4842, lr_0 = 1.9979e-04
Loss = 4.8057e-02, PNorm = 69.7197, GNorm = 0.6796, lr_0 = 1.9965e-04
Loss = 4.3838e-02, PNorm = 69.7218, GNorm = 0.4326, lr_0 = 1.9951e-04
Loss = 4.6012e-02, PNorm = 69.7255, GNorm = 0.4981, lr_0 = 1.9938e-04
Loss = 4.0188e-02, PNorm = 69.7282, GNorm = 0.6724, lr_0 = 1.9924e-04
Loss = 5.0766e-02, PNorm = 69.7311, GNorm = 0.5751, lr_0 = 1.9910e-04
Loss = 4.0806e-02, PNorm = 69.7340, GNorm = 0.3891, lr_0 = 1.9897e-04
Loss = 4.4513e-02, PNorm = 69.7358, GNorm = 0.3415, lr_0 = 1.9883e-04
Loss = 3.6811e-02, PNorm = 69.7388, GNorm = 0.6953, lr_0 = 1.9869e-04
Loss = 4.8282e-02, PNorm = 69.7430, GNorm = 0.6671, lr_0 = 1.9856e-04
Loss = 4.2240e-02, PNorm = 69.7466, GNorm = 0.4391, lr_0 = 1.9842e-04
Loss = 4.7093e-02, PNorm = 69.7500, GNorm = 0.4421, lr_0 = 1.9829e-04
Loss = 4.3954e-02, PNorm = 69.7537, GNorm = 0.6732, lr_0 = 1.9815e-04
Loss = 4.8732e-02, PNorm = 69.7574, GNorm = 0.6172, lr_0 = 1.9801e-04
Loss = 4.8663e-02, PNorm = 69.7620, GNorm = 0.5090, lr_0 = 1.9788e-04
Loss = 5.3950e-02, PNorm = 69.7666, GNorm = 0.4152, lr_0 = 1.9774e-04
Loss = 4.5130e-02, PNorm = 69.7706, GNorm = 0.6699, lr_0 = 1.9761e-04
Loss = 4.9780e-02, PNorm = 69.7739, GNorm = 0.6185, lr_0 = 1.9747e-04
Loss = 4.2838e-02, PNorm = 69.7779, GNorm = 0.5477, lr_0 = 1.9734e-04
Loss = 4.0453e-02, PNorm = 69.7800, GNorm = 0.4536, lr_0 = 1.9720e-04
Loss = 5.1395e-02, PNorm = 69.7833, GNorm = 0.7448, lr_0 = 1.9707e-04
Loss = 4.2343e-02, PNorm = 69.7858, GNorm = 0.5264, lr_0 = 1.9693e-04
Loss = 4.0954e-02, PNorm = 69.7884, GNorm = 0.4570, lr_0 = 1.9680e-04
Loss = 3.9279e-02, PNorm = 69.7930, GNorm = 0.3754, lr_0 = 1.9666e-04
Loss = 4.9400e-02, PNorm = 69.7959, GNorm = 0.7222, lr_0 = 1.9653e-04
Loss = 4.4022e-02, PNorm = 69.7964, GNorm = 0.4058, lr_0 = 1.9639e-04
Loss = 5.7609e-02, PNorm = 69.7988, GNorm = 0.6468, lr_0 = 1.9626e-04
Loss = 4.6362e-02, PNorm = 69.8038, GNorm = 0.5918, lr_0 = 1.9612e-04
Loss = 4.3220e-02, PNorm = 69.8084, GNorm = 0.5829, lr_0 = 1.9599e-04
Loss = 4.7348e-02, PNorm = 69.8107, GNorm = 0.5362, lr_0 = 1.9585e-04
Loss = 5.4779e-02, PNorm = 69.8147, GNorm = 0.4777, lr_0 = 1.9572e-04
Loss = 4.6872e-02, PNorm = 69.8182, GNorm = 0.4413, lr_0 = 1.9559e-04
Loss = 4.7397e-02, PNorm = 69.8211, GNorm = 0.4791, lr_0 = 1.9545e-04
Loss = 4.7050e-02, PNorm = 69.8252, GNorm = 1.1061, lr_0 = 1.9532e-04
Loss = 4.9582e-02, PNorm = 69.8303, GNorm = 0.4700, lr_0 = 1.9518e-04
Loss = 4.7678e-02, PNorm = 69.8339, GNorm = 0.6425, lr_0 = 1.9505e-04
Loss = 4.6996e-02, PNorm = 69.8364, GNorm = 0.5057, lr_0 = 1.9492e-04
Loss = 4.7318e-02, PNorm = 69.8396, GNorm = 0.4413, lr_0 = 1.9478e-04
Loss = 3.9767e-02, PNorm = 69.8439, GNorm = 0.5657, lr_0 = 1.9465e-04
Loss = 4.8809e-02, PNorm = 69.8471, GNorm = 0.5376, lr_0 = 1.9452e-04
Loss = 4.0523e-02, PNorm = 69.8513, GNorm = 0.4690, lr_0 = 1.9438e-04
Loss = 3.9961e-02, PNorm = 69.8555, GNorm = 0.5998, lr_0 = 1.9425e-04
Loss = 3.7604e-02, PNorm = 69.8591, GNorm = 0.5471, lr_0 = 1.9412e-04
Loss = 4.6590e-02, PNorm = 69.8615, GNorm = 0.4646, lr_0 = 1.9398e-04
Loss = 5.0828e-02, PNorm = 69.8633, GNorm = 0.5254, lr_0 = 1.9385e-04
Loss = 4.7936e-02, PNorm = 69.8650, GNorm = 0.6264, lr_0 = 1.9372e-04
Loss = 4.9516e-02, PNorm = 69.8675, GNorm = 0.5475, lr_0 = 1.9359e-04
Loss = 5.3160e-02, PNorm = 69.8706, GNorm = 0.5625, lr_0 = 1.9345e-04
Loss = 4.5358e-02, PNorm = 69.8740, GNorm = 0.5015, lr_0 = 1.9332e-04
Loss = 4.6882e-02, PNorm = 69.8771, GNorm = 0.4757, lr_0 = 1.9319e-04
Loss = 4.1968e-02, PNorm = 69.8804, GNorm = 0.5476, lr_0 = 1.9306e-04
Validation mae = 0.387363
Epoch 22
Loss = 3.8771e-02, PNorm = 69.8838, GNorm = 0.4642, lr_0 = 1.9292e-04
Loss = 4.2473e-02, PNorm = 69.8874, GNorm = 0.4443, lr_0 = 1.9279e-04
Loss = 3.8006e-02, PNorm = 69.8910, GNorm = 0.4217, lr_0 = 1.9266e-04
Loss = 3.9924e-02, PNorm = 69.8931, GNorm = 0.4068, lr_0 = 1.9253e-04
Loss = 3.6123e-02, PNorm = 69.8964, GNorm = 0.3913, lr_0 = 1.9240e-04
Loss = 3.3185e-02, PNorm = 69.8994, GNorm = 0.6097, lr_0 = 1.9226e-04
Loss = 3.8226e-02, PNorm = 69.9026, GNorm = 0.3969, lr_0 = 1.9213e-04
Loss = 3.3095e-02, PNorm = 69.9054, GNorm = 0.4940, lr_0 = 1.9200e-04
Loss = 3.5804e-02, PNorm = 69.9067, GNorm = 0.7968, lr_0 = 1.9187e-04
Loss = 4.0715e-02, PNorm = 69.9089, GNorm = 0.8082, lr_0 = 1.9174e-04
Loss = 3.5975e-02, PNorm = 69.9126, GNorm = 0.4029, lr_0 = 1.9161e-04
Loss = 4.1024e-02, PNorm = 69.9141, GNorm = 0.7034, lr_0 = 1.9148e-04
Loss = 5.1608e-02, PNorm = 69.9175, GNorm = 0.4815, lr_0 = 1.9134e-04
Loss = 3.8835e-02, PNorm = 69.9211, GNorm = 0.4026, lr_0 = 1.9121e-04
Loss = 3.9767e-02, PNorm = 69.9252, GNorm = 0.4186, lr_0 = 1.9108e-04
Loss = 4.2532e-02, PNorm = 69.9294, GNorm = 0.4558, lr_0 = 1.9095e-04
Loss = 3.3655e-02, PNorm = 69.9334, GNorm = 0.6328, lr_0 = 1.9082e-04
Loss = 4.5250e-02, PNorm = 69.9365, GNorm = 0.4966, lr_0 = 1.9069e-04
Loss = 4.8609e-02, PNorm = 69.9405, GNorm = 0.5789, lr_0 = 1.9056e-04
Loss = 3.8171e-02, PNorm = 69.9421, GNorm = 0.3424, lr_0 = 1.9043e-04
Loss = 4.4890e-02, PNorm = 69.9440, GNorm = 0.5852, lr_0 = 1.9030e-04
Loss = 3.3705e-02, PNorm = 69.9463, GNorm = 0.4060, lr_0 = 1.9017e-04
Loss = 4.7867e-02, PNorm = 69.9495, GNorm = 0.4901, lr_0 = 1.9004e-04
Loss = 3.6836e-02, PNorm = 69.9525, GNorm = 0.4530, lr_0 = 1.8991e-04
Loss = 3.8362e-02, PNorm = 69.9560, GNorm = 0.3195, lr_0 = 1.8978e-04
Loss = 4.4534e-02, PNorm = 69.9600, GNorm = 0.4620, lr_0 = 1.8965e-04
Loss = 3.9536e-02, PNorm = 69.9636, GNorm = 0.4602, lr_0 = 1.8952e-04
Loss = 3.9906e-02, PNorm = 69.9664, GNorm = 0.4466, lr_0 = 1.8939e-04
Loss = 3.3944e-02, PNorm = 69.9678, GNorm = 0.6016, lr_0 = 1.8926e-04
Loss = 3.7740e-02, PNorm = 69.9703, GNorm = 0.4570, lr_0 = 1.8913e-04
Loss = 4.5683e-02, PNorm = 69.9730, GNorm = 0.4275, lr_0 = 1.8900e-04
Loss = 3.8573e-02, PNorm = 69.9764, GNorm = 0.5946, lr_0 = 1.8887e-04
Loss = 3.6462e-02, PNorm = 69.9791, GNorm = 0.4833, lr_0 = 1.8874e-04
Loss = 4.4059e-02, PNorm = 69.9838, GNorm = 0.6538, lr_0 = 1.8861e-04
Loss = 4.4929e-02, PNorm = 69.9875, GNorm = 0.6045, lr_0 = 1.8848e-04
Loss = 4.1767e-02, PNorm = 69.9915, GNorm = 0.5137, lr_0 = 1.8835e-04
Loss = 4.3763e-02, PNorm = 69.9945, GNorm = 0.4955, lr_0 = 1.8822e-04
Loss = 4.3064e-02, PNorm = 69.9956, GNorm = 0.6522, lr_0 = 1.8809e-04
Loss = 4.4190e-02, PNorm = 69.9971, GNorm = 0.5021, lr_0 = 1.8797e-04
Loss = 5.0139e-02, PNorm = 70.0011, GNorm = 0.7052, lr_0 = 1.8784e-04
Loss = 3.3767e-02, PNorm = 70.0059, GNorm = 0.4860, lr_0 = 1.8771e-04
Loss = 4.5258e-02, PNorm = 70.0091, GNorm = 0.4332, lr_0 = 1.8758e-04
Loss = 4.5405e-02, PNorm = 70.0106, GNorm = 0.5142, lr_0 = 1.8745e-04
Loss = 4.1535e-02, PNorm = 70.0117, GNorm = 0.4788, lr_0 = 1.8732e-04
Loss = 4.2163e-02, PNorm = 70.0150, GNorm = 0.5948, lr_0 = 1.8719e-04
Loss = 3.6285e-02, PNorm = 70.0192, GNorm = 0.5088, lr_0 = 1.8707e-04
Loss = 4.2770e-02, PNorm = 70.0227, GNorm = 0.8669, lr_0 = 1.8694e-04
Loss = 3.4662e-02, PNorm = 70.0256, GNorm = 0.4394, lr_0 = 1.8681e-04
Loss = 5.2260e-02, PNorm = 70.0283, GNorm = 0.5058, lr_0 = 1.8668e-04
Loss = 4.4890e-02, PNorm = 70.0308, GNorm = 0.6410, lr_0 = 1.8655e-04
Loss = 4.5475e-02, PNorm = 70.0350, GNorm = 0.5214, lr_0 = 1.8643e-04
Loss = 4.7655e-02, PNorm = 70.0385, GNorm = 0.4659, lr_0 = 1.8630e-04
Loss = 4.4882e-02, PNorm = 70.0423, GNorm = 0.5154, lr_0 = 1.8617e-04
Loss = 4.7296e-02, PNorm = 70.0450, GNorm = 0.4620, lr_0 = 1.8604e-04
Loss = 3.7975e-02, PNorm = 70.0478, GNorm = 0.4301, lr_0 = 1.8592e-04
Loss = 4.5307e-02, PNorm = 70.0511, GNorm = 0.4885, lr_0 = 1.8579e-04
Loss = 3.6219e-02, PNorm = 70.0543, GNorm = 0.4458, lr_0 = 1.8566e-04
Loss = 4.2129e-02, PNorm = 70.0571, GNorm = 0.4921, lr_0 = 1.8553e-04
Loss = 5.1679e-02, PNorm = 70.0608, GNorm = 0.6408, lr_0 = 1.8541e-04
Loss = 4.4922e-02, PNorm = 70.0636, GNorm = 0.4897, lr_0 = 1.8528e-04
Loss = 4.2617e-02, PNorm = 70.0660, GNorm = 0.3891, lr_0 = 1.8515e-04
Loss = 4.2028e-02, PNorm = 70.0681, GNorm = 0.8846, lr_0 = 1.8503e-04
Loss = 4.5956e-02, PNorm = 70.0681, GNorm = 0.4469, lr_0 = 1.8490e-04
Loss = 3.8160e-02, PNorm = 70.0700, GNorm = 0.4730, lr_0 = 1.8477e-04
Loss = 3.9522e-02, PNorm = 70.0725, GNorm = 0.4616, lr_0 = 1.8465e-04
Loss = 4.6769e-02, PNorm = 70.0763, GNorm = 0.3665, lr_0 = 1.8452e-04
Loss = 4.8808e-02, PNorm = 70.0780, GNorm = 0.5065, lr_0 = 1.8439e-04
Loss = 5.5864e-02, PNorm = 70.0803, GNorm = 0.7590, lr_0 = 1.8427e-04
Loss = 3.8543e-02, PNorm = 70.0828, GNorm = 0.3998, lr_0 = 1.8414e-04
Loss = 4.8579e-02, PNorm = 70.0857, GNorm = 0.5953, lr_0 = 1.8401e-04
Loss = 4.2429e-02, PNorm = 70.0887, GNorm = 0.5414, lr_0 = 1.8389e-04
Loss = 3.8832e-02, PNorm = 70.0921, GNorm = 0.6260, lr_0 = 1.8376e-04
Loss = 5.0174e-02, PNorm = 70.0955, GNorm = 0.5668, lr_0 = 1.8364e-04
Loss = 4.8475e-02, PNorm = 70.0991, GNorm = 0.4555, lr_0 = 1.8351e-04
Loss = 3.9347e-02, PNorm = 70.1024, GNorm = 0.5947, lr_0 = 1.8338e-04
Loss = 4.0940e-02, PNorm = 70.1048, GNorm = 0.6188, lr_0 = 1.8326e-04
Loss = 4.1774e-02, PNorm = 70.1075, GNorm = 0.7251, lr_0 = 1.8313e-04
Loss = 4.3738e-02, PNorm = 70.1104, GNorm = 0.6781, lr_0 = 1.8301e-04
Loss = 4.0017e-02, PNorm = 70.1131, GNorm = 0.5015, lr_0 = 1.8288e-04
Loss = 4.1143e-02, PNorm = 70.1155, GNorm = 0.4780, lr_0 = 1.8276e-04
Loss = 3.7079e-02, PNorm = 70.1171, GNorm = 0.4775, lr_0 = 1.8263e-04
Loss = 5.1153e-02, PNorm = 70.1186, GNorm = 0.6181, lr_0 = 1.8251e-04
Loss = 4.6276e-02, PNorm = 70.1219, GNorm = 0.6844, lr_0 = 1.8238e-04
Loss = 3.8424e-02, PNorm = 70.1257, GNorm = 0.4487, lr_0 = 1.8226e-04
Loss = 4.7027e-02, PNorm = 70.1286, GNorm = 0.5013, lr_0 = 1.8213e-04
Loss = 5.0556e-02, PNorm = 70.1320, GNorm = 0.5760, lr_0 = 1.8201e-04
Loss = 4.4867e-02, PNorm = 70.1340, GNorm = 0.8180, lr_0 = 1.8188e-04
Loss = 4.1894e-02, PNorm = 70.1381, GNorm = 0.5267, lr_0 = 1.8176e-04
Loss = 4.0902e-02, PNorm = 70.1420, GNorm = 0.5984, lr_0 = 1.8163e-04
Loss = 4.7427e-02, PNorm = 70.1438, GNorm = 0.4456, lr_0 = 1.8151e-04
Loss = 4.2374e-02, PNorm = 70.1476, GNorm = 0.5267, lr_0 = 1.8138e-04
Loss = 4.3885e-02, PNorm = 70.1517, GNorm = 0.6755, lr_0 = 1.8126e-04
Loss = 3.9888e-02, PNorm = 70.1544, GNorm = 0.3895, lr_0 = 1.8114e-04
Loss = 4.4165e-02, PNorm = 70.1574, GNorm = 0.4916, lr_0 = 1.8101e-04
Loss = 4.4491e-02, PNorm = 70.1605, GNorm = 0.8776, lr_0 = 1.8089e-04
Loss = 3.5975e-02, PNorm = 70.1640, GNorm = 0.4943, lr_0 = 1.8076e-04
Loss = 4.0692e-02, PNorm = 70.1661, GNorm = 0.5672, lr_0 = 1.8064e-04
Loss = 4.9472e-02, PNorm = 70.1681, GNorm = 0.4456, lr_0 = 1.8052e-04
Loss = 4.5417e-02, PNorm = 70.1696, GNorm = 0.6073, lr_0 = 1.8039e-04
Loss = 3.8613e-02, PNorm = 70.1736, GNorm = 0.5192, lr_0 = 1.8027e-04
Loss = 3.9703e-02, PNorm = 70.1752, GNorm = 0.5036, lr_0 = 1.8015e-04
Loss = 3.7855e-02, PNorm = 70.1771, GNorm = 0.5271, lr_0 = 1.8002e-04
Loss = 3.3873e-02, PNorm = 70.1784, GNorm = 0.6994, lr_0 = 1.7990e-04
Loss = 4.1613e-02, PNorm = 70.1802, GNorm = 0.4115, lr_0 = 1.7978e-04
Loss = 4.0049e-02, PNorm = 70.1820, GNorm = 0.4677, lr_0 = 1.7965e-04
Loss = 4.6364e-02, PNorm = 70.1847, GNorm = 0.8235, lr_0 = 1.7953e-04
Loss = 5.1723e-02, PNorm = 70.1884, GNorm = 0.8369, lr_0 = 1.7941e-04
Loss = 4.3146e-02, PNorm = 70.1925, GNorm = 0.4999, lr_0 = 1.7928e-04
Loss = 4.3572e-02, PNorm = 70.1950, GNorm = 0.4501, lr_0 = 1.7916e-04
Loss = 4.5236e-02, PNorm = 70.1994, GNorm = 0.8099, lr_0 = 1.7904e-04
Loss = 4.8275e-02, PNorm = 70.2045, GNorm = 0.6356, lr_0 = 1.7892e-04
Loss = 4.4040e-02, PNorm = 70.2093, GNorm = 0.6088, lr_0 = 1.7879e-04
Loss = 4.8813e-02, PNorm = 70.2118, GNorm = 0.5438, lr_0 = 1.7867e-04
Loss = 4.2985e-02, PNorm = 70.2139, GNorm = 0.5328, lr_0 = 1.7855e-04
Loss = 4.1747e-02, PNorm = 70.2156, GNorm = 0.4463, lr_0 = 1.7843e-04
Loss = 5.1407e-02, PNorm = 70.2176, GNorm = 0.5566, lr_0 = 1.7830e-04
Loss = 4.4890e-02, PNorm = 70.2199, GNorm = 0.3780, lr_0 = 1.7818e-04
Loss = 4.1830e-02, PNorm = 70.2216, GNorm = 0.5960, lr_0 = 1.7806e-04
Loss = 4.5637e-02, PNorm = 70.2241, GNorm = 0.5857, lr_0 = 1.7794e-04
Loss = 4.6617e-02, PNorm = 70.2281, GNorm = 0.5056, lr_0 = 1.7782e-04
Validation mae = 0.388319
Epoch 23
Loss = 3.6724e-02, PNorm = 70.2306, GNorm = 0.5310, lr_0 = 1.7769e-04
Loss = 3.8993e-02, PNorm = 70.2346, GNorm = 0.5236, lr_0 = 1.7757e-04
Loss = 3.3526e-02, PNorm = 70.2376, GNorm = 0.4572, lr_0 = 1.7745e-04
Loss = 3.3546e-02, PNorm = 70.2427, GNorm = 0.5496, lr_0 = 1.7733e-04
Loss = 3.8664e-02, PNorm = 70.2459, GNorm = 0.3433, lr_0 = 1.7721e-04
Loss = 3.5624e-02, PNorm = 70.2485, GNorm = 0.5396, lr_0 = 1.7709e-04
Loss = 3.6146e-02, PNorm = 70.2498, GNorm = 0.4351, lr_0 = 1.7696e-04
Loss = 3.7510e-02, PNorm = 70.2532, GNorm = 0.4388, lr_0 = 1.7684e-04
Loss = 3.7612e-02, PNorm = 70.2561, GNorm = 0.4820, lr_0 = 1.7672e-04
Loss = 3.7678e-02, PNorm = 70.2592, GNorm = 0.4491, lr_0 = 1.7660e-04
Loss = 4.0685e-02, PNorm = 70.2606, GNorm = 0.4907, lr_0 = 1.7648e-04
Loss = 4.1436e-02, PNorm = 70.2626, GNorm = 0.5528, lr_0 = 1.7636e-04
Loss = 4.1991e-02, PNorm = 70.2645, GNorm = 0.4813, lr_0 = 1.7624e-04
Loss = 3.9656e-02, PNorm = 70.2675, GNorm = 0.4169, lr_0 = 1.7612e-04
Loss = 3.7881e-02, PNorm = 70.2703, GNorm = 0.3343, lr_0 = 1.7600e-04
Loss = 3.4958e-02, PNorm = 70.2733, GNorm = 0.5140, lr_0 = 1.7588e-04
Loss = 4.2970e-02, PNorm = 70.2760, GNorm = 0.4846, lr_0 = 1.7576e-04
Loss = 3.7467e-02, PNorm = 70.2778, GNorm = 0.6009, lr_0 = 1.7564e-04
Loss = 4.3211e-02, PNorm = 70.2815, GNorm = 0.4441, lr_0 = 1.7552e-04
Loss = 3.6389e-02, PNorm = 70.2849, GNorm = 0.4943, lr_0 = 1.7540e-04
Loss = 4.2895e-02, PNorm = 70.2887, GNorm = 0.5635, lr_0 = 1.7528e-04
Loss = 3.8399e-02, PNorm = 70.2923, GNorm = 0.5888, lr_0 = 1.7516e-04
Loss = 3.5792e-02, PNorm = 70.2950, GNorm = 0.5080, lr_0 = 1.7504e-04
Loss = 4.2901e-02, PNorm = 70.2962, GNorm = 0.5165, lr_0 = 1.7492e-04
Loss = 4.3304e-02, PNorm = 70.2981, GNorm = 0.4445, lr_0 = 1.7480e-04
Loss = 4.0460e-02, PNorm = 70.3013, GNorm = 0.8308, lr_0 = 1.7468e-04
Loss = 3.8223e-02, PNorm = 70.3047, GNorm = 0.5340, lr_0 = 1.7456e-04
Loss = 3.9095e-02, PNorm = 70.3090, GNorm = 0.3619, lr_0 = 1.7444e-04
Loss = 4.3429e-02, PNorm = 70.3126, GNorm = 0.4487, lr_0 = 1.7432e-04
Loss = 3.7278e-02, PNorm = 70.3149, GNorm = 0.5682, lr_0 = 1.7420e-04
Loss = 3.9048e-02, PNorm = 70.3178, GNorm = 0.5448, lr_0 = 1.7408e-04
Loss = 3.6492e-02, PNorm = 70.3200, GNorm = 0.4838, lr_0 = 1.7396e-04
Loss = 3.6590e-02, PNorm = 70.3211, GNorm = 0.4936, lr_0 = 1.7384e-04
Loss = 3.5465e-02, PNorm = 70.3224, GNorm = 0.4441, lr_0 = 1.7372e-04
Loss = 4.0935e-02, PNorm = 70.3245, GNorm = 0.5005, lr_0 = 1.7360e-04
Loss = 4.1869e-02, PNorm = 70.3279, GNorm = 0.6733, lr_0 = 1.7348e-04
Loss = 3.9646e-02, PNorm = 70.3319, GNorm = 0.5621, lr_0 = 1.7336e-04
Loss = 4.0689e-02, PNorm = 70.3354, GNorm = 1.1235, lr_0 = 1.7325e-04
Loss = 4.1509e-02, PNorm = 70.3386, GNorm = 0.4887, lr_0 = 1.7313e-04
Loss = 4.1452e-02, PNorm = 70.3408, GNorm = 0.5315, lr_0 = 1.7301e-04
Loss = 4.2391e-02, PNorm = 70.3417, GNorm = 0.6993, lr_0 = 1.7289e-04
Loss = 4.1100e-02, PNorm = 70.3442, GNorm = 0.4456, lr_0 = 1.7277e-04
Loss = 4.0994e-02, PNorm = 70.3470, GNorm = 0.5413, lr_0 = 1.7265e-04
Loss = 3.8589e-02, PNorm = 70.3491, GNorm = 0.8195, lr_0 = 1.7253e-04
Loss = 3.6671e-02, PNorm = 70.3510, GNorm = 0.4307, lr_0 = 1.7242e-04
Loss = 3.8698e-02, PNorm = 70.3548, GNorm = 0.5494, lr_0 = 1.7230e-04
Loss = 4.2546e-02, PNorm = 70.3575, GNorm = 0.5108, lr_0 = 1.7218e-04
Loss = 4.1172e-02, PNorm = 70.3595, GNorm = 0.4561, lr_0 = 1.7206e-04
Loss = 3.8470e-02, PNorm = 70.3620, GNorm = 0.4327, lr_0 = 1.7194e-04
Loss = 3.9183e-02, PNorm = 70.3641, GNorm = 0.4714, lr_0 = 1.7183e-04
Loss = 3.3549e-02, PNorm = 70.3668, GNorm = 0.5257, lr_0 = 1.7171e-04
Loss = 4.3247e-02, PNorm = 70.3696, GNorm = 0.2963, lr_0 = 1.7159e-04
Loss = 3.8170e-02, PNorm = 70.3722, GNorm = 0.6529, lr_0 = 1.7147e-04
Loss = 4.1155e-02, PNorm = 70.3757, GNorm = 0.4854, lr_0 = 1.7136e-04
Loss = 3.5722e-02, PNorm = 70.3791, GNorm = 0.5230, lr_0 = 1.7124e-04
Loss = 4.2384e-02, PNorm = 70.3800, GNorm = 0.5490, lr_0 = 1.7112e-04
Loss = 4.8847e-02, PNorm = 70.3823, GNorm = 0.6208, lr_0 = 1.7100e-04
Loss = 3.5964e-02, PNorm = 70.3844, GNorm = 0.3726, lr_0 = 1.7089e-04
Loss = 3.9731e-02, PNorm = 70.3863, GNorm = 0.4012, lr_0 = 1.7077e-04
Loss = 3.9490e-02, PNorm = 70.3890, GNorm = 0.4738, lr_0 = 1.7065e-04
Loss = 4.1611e-02, PNorm = 70.3910, GNorm = 0.5439, lr_0 = 1.7054e-04
Loss = 4.1771e-02, PNorm = 70.3934, GNorm = 0.4071, lr_0 = 1.7042e-04
Loss = 4.3213e-02, PNorm = 70.3940, GNorm = 0.5643, lr_0 = 1.7030e-04
Loss = 4.6454e-02, PNorm = 70.3977, GNorm = 0.5221, lr_0 = 1.7019e-04
Loss = 4.0839e-02, PNorm = 70.3997, GNorm = 0.6984, lr_0 = 1.7007e-04
Loss = 4.0178e-02, PNorm = 70.4023, GNorm = 0.4609, lr_0 = 1.6995e-04
Loss = 3.8696e-02, PNorm = 70.4058, GNorm = 0.3826, lr_0 = 1.6984e-04
Loss = 4.0924e-02, PNorm = 70.4076, GNorm = 0.7858, lr_0 = 1.6972e-04
Loss = 3.7824e-02, PNorm = 70.4091, GNorm = 0.7100, lr_0 = 1.6960e-04
Loss = 4.1308e-02, PNorm = 70.4105, GNorm = 0.4999, lr_0 = 1.6949e-04
Loss = 4.0347e-02, PNorm = 70.4120, GNorm = 0.6113, lr_0 = 1.6937e-04
Loss = 3.8413e-02, PNorm = 70.4157, GNorm = 0.4523, lr_0 = 1.6926e-04
Loss = 4.3231e-02, PNorm = 70.4198, GNorm = 0.5075, lr_0 = 1.6914e-04
Loss = 4.2712e-02, PNorm = 70.4232, GNorm = 0.4274, lr_0 = 1.6902e-04
Loss = 4.2743e-02, PNorm = 70.4254, GNorm = 0.5870, lr_0 = 1.6891e-04
Loss = 3.6160e-02, PNorm = 70.4289, GNorm = 0.3544, lr_0 = 1.6879e-04
Loss = 3.8157e-02, PNorm = 70.4314, GNorm = 0.3357, lr_0 = 1.6868e-04
Loss = 3.8831e-02, PNorm = 70.4337, GNorm = 0.6078, lr_0 = 1.6856e-04
Loss = 3.7128e-02, PNorm = 70.4363, GNorm = 0.3991, lr_0 = 1.6845e-04
Loss = 3.3437e-02, PNorm = 70.4376, GNorm = 0.4508, lr_0 = 1.6833e-04
Loss = 4.4015e-02, PNorm = 70.4380, GNorm = 0.6151, lr_0 = 1.6821e-04
Loss = 4.2456e-02, PNorm = 70.4393, GNorm = 0.4398, lr_0 = 1.6810e-04
Loss = 4.4572e-02, PNorm = 70.4411, GNorm = 0.4439, lr_0 = 1.6798e-04
Loss = 4.4169e-02, PNorm = 70.4432, GNorm = 0.4952, lr_0 = 1.6787e-04
Loss = 4.3288e-02, PNorm = 70.4460, GNorm = 0.4686, lr_0 = 1.6775e-04
Loss = 3.7931e-02, PNorm = 70.4480, GNorm = 0.4279, lr_0 = 1.6764e-04
Loss = 3.7091e-02, PNorm = 70.4516, GNorm = 0.6443, lr_0 = 1.6752e-04
Loss = 3.7734e-02, PNorm = 70.4555, GNorm = 0.4308, lr_0 = 1.6741e-04
Loss = 4.2496e-02, PNorm = 70.4579, GNorm = 0.6567, lr_0 = 1.6729e-04
Loss = 4.6093e-02, PNorm = 70.4602, GNorm = 0.7069, lr_0 = 1.6718e-04
Loss = 4.0213e-02, PNorm = 70.4619, GNorm = 0.3695, lr_0 = 1.6707e-04
Loss = 3.8145e-02, PNorm = 70.4650, GNorm = 0.5455, lr_0 = 1.6695e-04
Loss = 3.7742e-02, PNorm = 70.4677, GNorm = 0.5025, lr_0 = 1.6684e-04
Loss = 3.8025e-02, PNorm = 70.4696, GNorm = 0.3939, lr_0 = 1.6672e-04
Loss = 3.9558e-02, PNorm = 70.4714, GNorm = 0.4141, lr_0 = 1.6661e-04
Loss = 4.1038e-02, PNorm = 70.4737, GNorm = 0.4890, lr_0 = 1.6649e-04
Loss = 3.8889e-02, PNorm = 70.4749, GNorm = 0.4856, lr_0 = 1.6638e-04
Loss = 4.1752e-02, PNorm = 70.4776, GNorm = 0.4436, lr_0 = 1.6627e-04
Loss = 4.2700e-02, PNorm = 70.4799, GNorm = 0.5499, lr_0 = 1.6615e-04
Loss = 3.9328e-02, PNorm = 70.4833, GNorm = 0.6760, lr_0 = 1.6604e-04
Loss = 3.5673e-02, PNorm = 70.4857, GNorm = 0.4576, lr_0 = 1.6592e-04
Loss = 4.7025e-02, PNorm = 70.4885, GNorm = 0.6511, lr_0 = 1.6581e-04
Loss = 3.8323e-02, PNorm = 70.4907, GNorm = 0.6321, lr_0 = 1.6570e-04
Loss = 3.8979e-02, PNorm = 70.4918, GNorm = 0.6686, lr_0 = 1.6558e-04
Loss = 4.3966e-02, PNorm = 70.4936, GNorm = 0.6124, lr_0 = 1.6547e-04
Loss = 3.9251e-02, PNorm = 70.4958, GNorm = 0.4712, lr_0 = 1.6536e-04
Loss = 4.0225e-02, PNorm = 70.4974, GNorm = 0.4393, lr_0 = 1.6524e-04
Loss = 3.9696e-02, PNorm = 70.4996, GNorm = 0.4910, lr_0 = 1.6513e-04
Loss = 4.0311e-02, PNorm = 70.5007, GNorm = 0.3519, lr_0 = 1.6502e-04
Loss = 3.9033e-02, PNorm = 70.5034, GNorm = 0.5060, lr_0 = 1.6490e-04
Loss = 4.0828e-02, PNorm = 70.5044, GNorm = 0.4901, lr_0 = 1.6479e-04
Loss = 3.9166e-02, PNorm = 70.5084, GNorm = 0.4686, lr_0 = 1.6468e-04
Loss = 3.7640e-02, PNorm = 70.5111, GNorm = 0.4053, lr_0 = 1.6457e-04
Loss = 3.2248e-02, PNorm = 70.5133, GNorm = 0.3636, lr_0 = 1.6445e-04
Loss = 4.1136e-02, PNorm = 70.5159, GNorm = 0.5630, lr_0 = 1.6434e-04
Loss = 4.2679e-02, PNorm = 70.5185, GNorm = 0.6916, lr_0 = 1.6423e-04
Loss = 3.4623e-02, PNorm = 70.5221, GNorm = 0.4018, lr_0 = 1.6412e-04
Loss = 4.7341e-02, PNorm = 70.5255, GNorm = 0.4870, lr_0 = 1.6400e-04
Loss = 5.2559e-02, PNorm = 70.5289, GNorm = 0.5317, lr_0 = 1.6389e-04
Loss = 5.0565e-02, PNorm = 70.5317, GNorm = 0.4451, lr_0 = 1.6378e-04
Validation mae = 0.391725
Epoch 24
Loss = 3.2041e-02, PNorm = 70.5352, GNorm = 0.4396, lr_0 = 1.6367e-04
Loss = 3.0865e-02, PNorm = 70.5367, GNorm = 0.4710, lr_0 = 1.6355e-04
Loss = 3.8987e-02, PNorm = 70.5384, GNorm = 0.4967, lr_0 = 1.6344e-04
Loss = 3.3631e-02, PNorm = 70.5415, GNorm = 0.7081, lr_0 = 1.6333e-04
Loss = 4.5281e-02, PNorm = 70.5448, GNorm = 0.5914, lr_0 = 1.6322e-04
Loss = 3.6488e-02, PNorm = 70.5477, GNorm = 0.5334, lr_0 = 1.6311e-04
Loss = 3.1591e-02, PNorm = 70.5509, GNorm = 0.4908, lr_0 = 1.6299e-04
Loss = 3.2866e-02, PNorm = 70.5534, GNorm = 0.8209, lr_0 = 1.6288e-04
Loss = 3.9584e-02, PNorm = 70.5561, GNorm = 0.6128, lr_0 = 1.6277e-04
Loss = 3.9131e-02, PNorm = 70.5590, GNorm = 0.4598, lr_0 = 1.6266e-04
Loss = 3.6560e-02, PNorm = 70.5614, GNorm = 0.6447, lr_0 = 1.6255e-04
Loss = 3.7083e-02, PNorm = 70.5647, GNorm = 0.3344, lr_0 = 1.6244e-04
Loss = 3.6809e-02, PNorm = 70.5666, GNorm = 0.6386, lr_0 = 1.6233e-04
Loss = 4.5652e-02, PNorm = 70.5697, GNorm = 0.4929, lr_0 = 1.6221e-04
Loss = 3.5883e-02, PNorm = 70.5737, GNorm = 0.3862, lr_0 = 1.6210e-04
Loss = 3.3647e-02, PNorm = 70.5763, GNorm = 0.4343, lr_0 = 1.6199e-04
Loss = 4.1869e-02, PNorm = 70.5783, GNorm = 0.5469, lr_0 = 1.6188e-04
Loss = 3.2319e-02, PNorm = 70.5810, GNorm = 0.4931, lr_0 = 1.6177e-04
Loss = 4.0669e-02, PNorm = 70.5830, GNorm = 0.6420, lr_0 = 1.6166e-04
Loss = 3.3660e-02, PNorm = 70.5861, GNorm = 0.4377, lr_0 = 1.6155e-04
Loss = 3.2824e-02, PNorm = 70.5870, GNorm = 0.6527, lr_0 = 1.6144e-04
Loss = 3.4398e-02, PNorm = 70.5886, GNorm = 0.3364, lr_0 = 1.6133e-04
Loss = 3.5840e-02, PNorm = 70.5910, GNorm = 0.3722, lr_0 = 1.6122e-04
Loss = 4.0080e-02, PNorm = 70.5945, GNorm = 0.6375, lr_0 = 1.6111e-04
Loss = 3.4097e-02, PNorm = 70.5965, GNorm = 0.3543, lr_0 = 1.6100e-04
Loss = 3.9602e-02, PNorm = 70.5993, GNorm = 0.4926, lr_0 = 1.6089e-04
Loss = 3.7461e-02, PNorm = 70.6028, GNorm = 0.3809, lr_0 = 1.6078e-04
Loss = 3.7401e-02, PNorm = 70.6051, GNorm = 0.5708, lr_0 = 1.6067e-04
Loss = 3.7080e-02, PNorm = 70.6081, GNorm = 0.4247, lr_0 = 1.6056e-04
Loss = 3.5490e-02, PNorm = 70.6110, GNorm = 0.4911, lr_0 = 1.6045e-04
Loss = 3.7496e-02, PNorm = 70.6132, GNorm = 0.5809, lr_0 = 1.6034e-04
Loss = 3.6494e-02, PNorm = 70.6155, GNorm = 0.4508, lr_0 = 1.6023e-04
Loss = 3.8619e-02, PNorm = 70.6175, GNorm = 0.5646, lr_0 = 1.6012e-04
Loss = 3.4434e-02, PNorm = 70.6201, GNorm = 0.5518, lr_0 = 1.6001e-04
Loss = 3.9151e-02, PNorm = 70.6233, GNorm = 0.4950, lr_0 = 1.5990e-04
Loss = 3.1348e-02, PNorm = 70.6255, GNorm = 0.6226, lr_0 = 1.5979e-04
Loss = 3.8975e-02, PNorm = 70.6288, GNorm = 0.6047, lr_0 = 1.5968e-04
Loss = 3.5210e-02, PNorm = 70.6307, GNorm = 0.5942, lr_0 = 1.5957e-04
Loss = 3.4774e-02, PNorm = 70.6326, GNorm = 0.3926, lr_0 = 1.5946e-04
Loss = 4.5774e-02, PNorm = 70.6338, GNorm = 0.6098, lr_0 = 1.5935e-04
Loss = 3.0766e-02, PNorm = 70.6357, GNorm = 0.3454, lr_0 = 1.5924e-04
Loss = 3.8558e-02, PNorm = 70.6375, GNorm = 0.3878, lr_0 = 1.5913e-04
Loss = 3.6769e-02, PNorm = 70.6394, GNorm = 0.6031, lr_0 = 1.5902e-04
Loss = 3.6332e-02, PNorm = 70.6425, GNorm = 0.4572, lr_0 = 1.5891e-04
Loss = 4.0094e-02, PNorm = 70.6450, GNorm = 0.5330, lr_0 = 1.5880e-04
Loss = 4.0190e-02, PNorm = 70.6465, GNorm = 0.4731, lr_0 = 1.5870e-04
Loss = 3.4638e-02, PNorm = 70.6489, GNorm = 0.5858, lr_0 = 1.5859e-04
Loss = 4.0367e-02, PNorm = 70.6506, GNorm = 0.3578, lr_0 = 1.5848e-04
Loss = 3.0820e-02, PNorm = 70.6527, GNorm = 0.4243, lr_0 = 1.5837e-04
Loss = 3.6761e-02, PNorm = 70.6555, GNorm = 0.4739, lr_0 = 1.5826e-04
Loss = 3.7744e-02, PNorm = 70.6584, GNorm = 0.5316, lr_0 = 1.5815e-04
Loss = 3.8366e-02, PNorm = 70.6609, GNorm = 0.4472, lr_0 = 1.5804e-04
Loss = 3.8038e-02, PNorm = 70.6638, GNorm = 0.4519, lr_0 = 1.5794e-04
Loss = 4.0205e-02, PNorm = 70.6664, GNorm = 0.3655, lr_0 = 1.5783e-04
Loss = 4.5324e-02, PNorm = 70.6683, GNorm = 0.7124, lr_0 = 1.5772e-04
Loss = 4.1185e-02, PNorm = 70.6707, GNorm = 0.5693, lr_0 = 1.5761e-04
Loss = 3.8417e-02, PNorm = 70.6721, GNorm = 0.6003, lr_0 = 1.5750e-04
Loss = 4.4811e-02, PNorm = 70.6738, GNorm = 0.5952, lr_0 = 1.5740e-04
Loss = 3.8089e-02, PNorm = 70.6773, GNorm = 0.5843, lr_0 = 1.5729e-04
Loss = 3.9362e-02, PNorm = 70.6791, GNorm = 0.5249, lr_0 = 1.5718e-04
Loss = 3.9150e-02, PNorm = 70.6805, GNorm = 0.4691, lr_0 = 1.5707e-04
Loss = 3.6913e-02, PNorm = 70.6829, GNorm = 0.7137, lr_0 = 1.5697e-04
Loss = 3.9314e-02, PNorm = 70.6851, GNorm = 0.5385, lr_0 = 1.5686e-04
Loss = 3.6630e-02, PNorm = 70.6874, GNorm = 0.4661, lr_0 = 1.5675e-04
Loss = 4.3794e-02, PNorm = 70.6903, GNorm = 0.5594, lr_0 = 1.5664e-04
Loss = 3.2929e-02, PNorm = 70.6930, GNorm = 0.5381, lr_0 = 1.5654e-04
Loss = 4.4086e-02, PNorm = 70.6944, GNorm = 0.4201, lr_0 = 1.5643e-04
Loss = 4.2896e-02, PNorm = 70.6969, GNorm = 0.7103, lr_0 = 1.5632e-04
Loss = 3.6202e-02, PNorm = 70.7000, GNorm = 0.5203, lr_0 = 1.5621e-04
Loss = 3.5346e-02, PNorm = 70.7015, GNorm = 0.4846, lr_0 = 1.5611e-04
Loss = 4.2835e-02, PNorm = 70.7038, GNorm = 0.5966, lr_0 = 1.5600e-04
Loss = 4.2901e-02, PNorm = 70.7069, GNorm = 0.5597, lr_0 = 1.5589e-04
Loss = 3.6447e-02, PNorm = 70.7095, GNorm = 0.4175, lr_0 = 1.5579e-04
Loss = 3.9324e-02, PNorm = 70.7120, GNorm = 0.7202, lr_0 = 1.5568e-04
Loss = 3.8448e-02, PNorm = 70.7143, GNorm = 0.4986, lr_0 = 1.5557e-04
Loss = 3.4955e-02, PNorm = 70.7144, GNorm = 0.5438, lr_0 = 1.5547e-04
Loss = 4.0288e-02, PNorm = 70.7155, GNorm = 0.7212, lr_0 = 1.5536e-04
Loss = 3.9590e-02, PNorm = 70.7181, GNorm = 0.5380, lr_0 = 1.5525e-04
Loss = 3.6260e-02, PNorm = 70.7209, GNorm = 0.4740, lr_0 = 1.5515e-04
Loss = 3.6722e-02, PNorm = 70.7236, GNorm = 0.6068, lr_0 = 1.5504e-04
Loss = 3.9508e-02, PNorm = 70.7258, GNorm = 0.5331, lr_0 = 1.5493e-04
Loss = 4.4002e-02, PNorm = 70.7285, GNorm = 0.6127, lr_0 = 1.5483e-04
Loss = 3.9273e-02, PNorm = 70.7311, GNorm = 0.5385, lr_0 = 1.5472e-04
Loss = 3.7522e-02, PNorm = 70.7330, GNorm = 0.5556, lr_0 = 1.5462e-04
Loss = 3.3942e-02, PNorm = 70.7336, GNorm = 0.5276, lr_0 = 1.5451e-04
Loss = 4.1365e-02, PNorm = 70.7358, GNorm = 0.4284, lr_0 = 1.5440e-04
Loss = 4.1290e-02, PNorm = 70.7381, GNorm = 0.6107, lr_0 = 1.5430e-04
Loss = 3.7809e-02, PNorm = 70.7414, GNorm = 0.4374, lr_0 = 1.5419e-04
Loss = 4.4078e-02, PNorm = 70.7435, GNorm = 0.4794, lr_0 = 1.5409e-04
Loss = 4.2786e-02, PNorm = 70.7457, GNorm = 0.5216, lr_0 = 1.5398e-04
Loss = 4.0783e-02, PNorm = 70.7486, GNorm = 0.4182, lr_0 = 1.5388e-04
Loss = 3.7053e-02, PNorm = 70.7506, GNorm = 0.5107, lr_0 = 1.5377e-04
Loss = 4.3193e-02, PNorm = 70.7522, GNorm = 0.5261, lr_0 = 1.5367e-04
Loss = 3.1766e-02, PNorm = 70.7544, GNorm = 0.4701, lr_0 = 1.5356e-04
Loss = 3.8100e-02, PNorm = 70.7576, GNorm = 0.5991, lr_0 = 1.5346e-04
Loss = 3.8213e-02, PNorm = 70.7603, GNorm = 0.4667, lr_0 = 1.5335e-04
Loss = 3.6666e-02, PNorm = 70.7623, GNorm = 0.7034, lr_0 = 1.5325e-04
Loss = 3.8122e-02, PNorm = 70.7648, GNorm = 0.5939, lr_0 = 1.5314e-04
Loss = 4.0805e-02, PNorm = 70.7659, GNorm = 0.5504, lr_0 = 1.5304e-04
Loss = 3.6579e-02, PNorm = 70.7665, GNorm = 0.3791, lr_0 = 1.5293e-04
Loss = 4.2011e-02, PNorm = 70.7696, GNorm = 0.4084, lr_0 = 1.5283e-04
Loss = 3.8952e-02, PNorm = 70.7722, GNorm = 0.5012, lr_0 = 1.5272e-04
Loss = 3.6591e-02, PNorm = 70.7730, GNorm = 0.5026, lr_0 = 1.5262e-04
Loss = 4.4320e-02, PNorm = 70.7757, GNorm = 0.6209, lr_0 = 1.5251e-04
Loss = 3.6516e-02, PNorm = 70.7776, GNorm = 0.4147, lr_0 = 1.5241e-04
Loss = 3.2540e-02, PNorm = 70.7799, GNorm = 0.5584, lr_0 = 1.5230e-04
Loss = 3.4564e-02, PNorm = 70.7816, GNorm = 0.4724, lr_0 = 1.5220e-04
Loss = 3.6157e-02, PNorm = 70.7832, GNorm = 0.5496, lr_0 = 1.5209e-04
Loss = 3.9743e-02, PNorm = 70.7840, GNorm = 0.6197, lr_0 = 1.5199e-04
Loss = 4.1630e-02, PNorm = 70.7838, GNorm = 0.5723, lr_0 = 1.5189e-04
Loss = 3.7353e-02, PNorm = 70.7842, GNorm = 0.4712, lr_0 = 1.5178e-04
Loss = 3.9075e-02, PNorm = 70.7849, GNorm = 0.5614, lr_0 = 1.5168e-04
Loss = 3.5591e-02, PNorm = 70.7866, GNorm = 0.4448, lr_0 = 1.5157e-04
Loss = 4.1831e-02, PNorm = 70.7886, GNorm = 0.4657, lr_0 = 1.5147e-04
Loss = 3.9181e-02, PNorm = 70.7910, GNorm = 0.4984, lr_0 = 1.5137e-04
Loss = 4.0477e-02, PNorm = 70.7944, GNorm = 0.3630, lr_0 = 1.5126e-04
Loss = 3.2160e-02, PNorm = 70.7981, GNorm = 0.3182, lr_0 = 1.5116e-04
Loss = 3.1858e-02, PNorm = 70.8005, GNorm = 0.3770, lr_0 = 1.5106e-04
Loss = 4.0125e-02, PNorm = 70.8030, GNorm = 0.4193, lr_0 = 1.5095e-04
Loss = 3.9001e-02, PNorm = 70.8067, GNorm = 0.6508, lr_0 = 1.5085e-04
Validation mae = 0.393103
Epoch 25
Loss = 3.8934e-02, PNorm = 70.8093, GNorm = 0.5609, lr_0 = 1.5075e-04
Loss = 2.8561e-02, PNorm = 70.8126, GNorm = 0.4123, lr_0 = 1.5064e-04
Loss = 3.7296e-02, PNorm = 70.8153, GNorm = 0.3550, lr_0 = 1.5054e-04
Loss = 3.5932e-02, PNorm = 70.8170, GNorm = 0.4536, lr_0 = 1.5044e-04
Loss = 3.0264e-02, PNorm = 70.8197, GNorm = 0.4454, lr_0 = 1.5033e-04
Loss = 3.7113e-02, PNorm = 70.8209, GNorm = 0.4892, lr_0 = 1.5023e-04
Loss = 3.3250e-02, PNorm = 70.8226, GNorm = 0.5103, lr_0 = 1.5013e-04
Loss = 3.0435e-02, PNorm = 70.8240, GNorm = 0.6019, lr_0 = 1.5002e-04
Loss = 3.2675e-02, PNorm = 70.8253, GNorm = 0.3606, lr_0 = 1.4992e-04
Loss = 3.5263e-02, PNorm = 70.8265, GNorm = 0.4367, lr_0 = 1.4982e-04
Loss = 3.4403e-02, PNorm = 70.8279, GNorm = 0.5497, lr_0 = 1.4972e-04
Loss = 2.9685e-02, PNorm = 70.8299, GNorm = 0.3722, lr_0 = 1.4961e-04
Loss = 3.5487e-02, PNorm = 70.8320, GNorm = 0.3867, lr_0 = 1.4951e-04
Loss = 2.9577e-02, PNorm = 70.8340, GNorm = 0.3841, lr_0 = 1.4941e-04
Loss = 3.3064e-02, PNorm = 70.8363, GNorm = 0.3487, lr_0 = 1.4931e-04
Loss = 3.9830e-02, PNorm = 70.8395, GNorm = 0.8682, lr_0 = 1.4920e-04
Loss = 3.1322e-02, PNorm = 70.8425, GNorm = 0.4903, lr_0 = 1.4910e-04
Loss = 3.2961e-02, PNorm = 70.8446, GNorm = 0.5914, lr_0 = 1.4900e-04
Loss = 3.2036e-02, PNorm = 70.8461, GNorm = 0.4628, lr_0 = 1.4890e-04
Loss = 4.0606e-02, PNorm = 70.8484, GNorm = 0.5193, lr_0 = 1.4880e-04
Loss = 3.3621e-02, PNorm = 70.8508, GNorm = 0.3986, lr_0 = 1.4869e-04
Loss = 3.5698e-02, PNorm = 70.8526, GNorm = 0.4120, lr_0 = 1.4859e-04
Loss = 3.7109e-02, PNorm = 70.8542, GNorm = 0.4154, lr_0 = 1.4849e-04
Loss = 3.1286e-02, PNorm = 70.8571, GNorm = 0.4325, lr_0 = 1.4839e-04
Loss = 3.2033e-02, PNorm = 70.8595, GNorm = 0.6912, lr_0 = 1.4829e-04
Loss = 4.2056e-02, PNorm = 70.8622, GNorm = 0.5261, lr_0 = 1.4818e-04
Loss = 3.1436e-02, PNorm = 70.8650, GNorm = 0.9150, lr_0 = 1.4808e-04
Loss = 3.6644e-02, PNorm = 70.8674, GNorm = 0.8185, lr_0 = 1.4798e-04
Loss = 3.5877e-02, PNorm = 70.8708, GNorm = 0.4912, lr_0 = 1.4788e-04
Loss = 2.7526e-02, PNorm = 70.8738, GNorm = 0.3834, lr_0 = 1.4778e-04
Loss = 3.6362e-02, PNorm = 70.8766, GNorm = 0.8469, lr_0 = 1.4768e-04
Loss = 3.6729e-02, PNorm = 70.8790, GNorm = 0.5007, lr_0 = 1.4758e-04
Loss = 3.5965e-02, PNorm = 70.8817, GNorm = 0.8365, lr_0 = 1.4748e-04
Loss = 3.4462e-02, PNorm = 70.8845, GNorm = 0.3743, lr_0 = 1.4737e-04
Loss = 3.1618e-02, PNorm = 70.8857, GNorm = 0.4610, lr_0 = 1.4727e-04
Loss = 4.1316e-02, PNorm = 70.8875, GNorm = 0.4039, lr_0 = 1.4717e-04
Loss = 3.9716e-02, PNorm = 70.8913, GNorm = 0.6775, lr_0 = 1.4707e-04
Loss = 3.8543e-02, PNorm = 70.8946, GNorm = 0.4404, lr_0 = 1.4697e-04
Loss = 3.6250e-02, PNorm = 70.8967, GNorm = 0.5314, lr_0 = 1.4687e-04
Loss = 3.3014e-02, PNorm = 70.8974, GNorm = 0.5124, lr_0 = 1.4677e-04
Loss = 3.8171e-02, PNorm = 70.8999, GNorm = 0.4439, lr_0 = 1.4667e-04
Loss = 3.4374e-02, PNorm = 70.9024, GNorm = 0.3817, lr_0 = 1.4657e-04
Loss = 3.6602e-02, PNorm = 70.9051, GNorm = 0.7637, lr_0 = 1.4647e-04
Loss = 3.5633e-02, PNorm = 70.9072, GNorm = 0.4128, lr_0 = 1.4637e-04
Loss = 3.6594e-02, PNorm = 70.9106, GNorm = 0.5805, lr_0 = 1.4627e-04
Loss = 3.9390e-02, PNorm = 70.9128, GNorm = 0.7855, lr_0 = 1.4617e-04
Loss = 3.7054e-02, PNorm = 70.9143, GNorm = 0.3550, lr_0 = 1.4607e-04
Loss = 4.0201e-02, PNorm = 70.9165, GNorm = 0.4943, lr_0 = 1.4597e-04
Loss = 3.7950e-02, PNorm = 70.9188, GNorm = 0.3734, lr_0 = 1.4587e-04
Loss = 3.8889e-02, PNorm = 70.9211, GNorm = 0.5859, lr_0 = 1.4577e-04
Loss = 3.4660e-02, PNorm = 70.9230, GNorm = 0.5500, lr_0 = 1.4567e-04
Loss = 4.0727e-02, PNorm = 70.9264, GNorm = 0.5057, lr_0 = 1.4557e-04
Loss = 3.7320e-02, PNorm = 70.9293, GNorm = 0.8133, lr_0 = 1.4547e-04
Loss = 3.7569e-02, PNorm = 70.9328, GNorm = 0.4821, lr_0 = 1.4537e-04
Loss = 3.5341e-02, PNorm = 70.9358, GNorm = 0.5043, lr_0 = 1.4527e-04
Loss = 3.4390e-02, PNorm = 70.9384, GNorm = 0.4379, lr_0 = 1.4517e-04
Loss = 3.4100e-02, PNorm = 70.9409, GNorm = 0.4549, lr_0 = 1.4507e-04
Loss = 3.2881e-02, PNorm = 70.9428, GNorm = 0.4033, lr_0 = 1.4497e-04
Loss = 3.5795e-02, PNorm = 70.9448, GNorm = 0.5760, lr_0 = 1.4487e-04
Loss = 3.4995e-02, PNorm = 70.9468, GNorm = 0.7227, lr_0 = 1.4477e-04
Loss = 3.8008e-02, PNorm = 70.9489, GNorm = 0.4702, lr_0 = 1.4467e-04
Loss = 3.2209e-02, PNorm = 70.9504, GNorm = 0.5273, lr_0 = 1.4457e-04
Loss = 3.1114e-02, PNorm = 70.9535, GNorm = 0.4387, lr_0 = 1.4447e-04
Loss = 3.6232e-02, PNorm = 70.9560, GNorm = 0.4740, lr_0 = 1.4438e-04
Loss = 3.6858e-02, PNorm = 70.9574, GNorm = 0.5108, lr_0 = 1.4428e-04
Loss = 4.2739e-02, PNorm = 70.9595, GNorm = 0.3967, lr_0 = 1.4418e-04
Loss = 3.2898e-02, PNorm = 70.9630, GNorm = 0.4222, lr_0 = 1.4408e-04
Loss = 3.1519e-02, PNorm = 70.9647, GNorm = 0.4351, lr_0 = 1.4398e-04
Loss = 3.8426e-02, PNorm = 70.9659, GNorm = 0.4980, lr_0 = 1.4388e-04
Loss = 3.7690e-02, PNorm = 70.9674, GNorm = 0.4556, lr_0 = 1.4378e-04
Loss = 3.8540e-02, PNorm = 70.9697, GNorm = 0.5838, lr_0 = 1.4368e-04
Loss = 3.1590e-02, PNorm = 70.9707, GNorm = 0.3263, lr_0 = 1.4359e-04
Loss = 3.1256e-02, PNorm = 70.9729, GNorm = 0.4573, lr_0 = 1.4349e-04
Loss = 3.2558e-02, PNorm = 70.9754, GNorm = 0.4858, lr_0 = 1.4339e-04
Loss = 3.4360e-02, PNorm = 70.9775, GNorm = 0.4057, lr_0 = 1.4329e-04
Loss = 3.1963e-02, PNorm = 70.9801, GNorm = 0.4648, lr_0 = 1.4319e-04
Loss = 3.9764e-02, PNorm = 70.9819, GNorm = 0.8495, lr_0 = 1.4310e-04
Loss = 3.3315e-02, PNorm = 70.9828, GNorm = 0.6298, lr_0 = 1.4300e-04
Loss = 3.4259e-02, PNorm = 70.9846, GNorm = 0.4148, lr_0 = 1.4290e-04
Loss = 3.8240e-02, PNorm = 70.9853, GNorm = 0.4549, lr_0 = 1.4280e-04
Loss = 4.2706e-02, PNorm = 70.9862, GNorm = 0.6309, lr_0 = 1.4270e-04
Loss = 3.5138e-02, PNorm = 70.9884, GNorm = 0.4136, lr_0 = 1.4261e-04
Loss = 4.5924e-02, PNorm = 70.9902, GNorm = 0.4900, lr_0 = 1.4251e-04
Loss = 3.4729e-02, PNorm = 70.9916, GNorm = 0.4617, lr_0 = 1.4241e-04
Loss = 3.3694e-02, PNorm = 70.9935, GNorm = 0.5081, lr_0 = 1.4231e-04
Loss = 3.4796e-02, PNorm = 70.9959, GNorm = 0.4186, lr_0 = 1.4222e-04
Loss = 3.6002e-02, PNorm = 70.9971, GNorm = 0.4818, lr_0 = 1.4212e-04
Loss = 3.7770e-02, PNorm = 70.9981, GNorm = 0.4929, lr_0 = 1.4202e-04
Loss = 3.9554e-02, PNorm = 70.9993, GNorm = 0.5621, lr_0 = 1.4192e-04
Loss = 3.3686e-02, PNorm = 71.0009, GNorm = 0.5408, lr_0 = 1.4183e-04
Loss = 4.2390e-02, PNorm = 71.0036, GNorm = 0.3683, lr_0 = 1.4173e-04
Loss = 3.5094e-02, PNorm = 71.0050, GNorm = 0.3757, lr_0 = 1.4163e-04
Loss = 3.7070e-02, PNorm = 71.0065, GNorm = 0.4781, lr_0 = 1.4153e-04
Loss = 3.4674e-02, PNorm = 71.0085, GNorm = 0.6804, lr_0 = 1.4144e-04
Loss = 4.2206e-02, PNorm = 71.0096, GNorm = 0.4524, lr_0 = 1.4134e-04
Loss = 4.0422e-02, PNorm = 71.0103, GNorm = 0.5834, lr_0 = 1.4124e-04
Loss = 4.0243e-02, PNorm = 71.0120, GNorm = 0.4397, lr_0 = 1.4115e-04
Loss = 3.8984e-02, PNorm = 71.0136, GNorm = 0.6862, lr_0 = 1.4105e-04
Loss = 3.7734e-02, PNorm = 71.0148, GNorm = 0.5050, lr_0 = 1.4095e-04
Loss = 4.3301e-02, PNorm = 71.0166, GNorm = 0.3857, lr_0 = 1.4086e-04
Loss = 4.2700e-02, PNorm = 71.0184, GNorm = 0.6926, lr_0 = 1.4076e-04
Loss = 4.6805e-02, PNorm = 71.0202, GNorm = 0.3938, lr_0 = 1.4066e-04
Loss = 3.6613e-02, PNorm = 71.0219, GNorm = 0.5536, lr_0 = 1.4057e-04
Loss = 3.6444e-02, PNorm = 71.0244, GNorm = 0.7084, lr_0 = 1.4047e-04
Loss = 3.3658e-02, PNorm = 71.0259, GNorm = 0.4130, lr_0 = 1.4038e-04
Loss = 3.6231e-02, PNorm = 71.0272, GNorm = 0.3974, lr_0 = 1.4028e-04
Loss = 3.8579e-02, PNorm = 71.0284, GNorm = 0.3490, lr_0 = 1.4018e-04
Loss = 4.3754e-02, PNorm = 71.0311, GNorm = 0.4549, lr_0 = 1.4009e-04
Loss = 3.5350e-02, PNorm = 71.0334, GNorm = 0.4216, lr_0 = 1.3999e-04
Loss = 4.0206e-02, PNorm = 71.0344, GNorm = 0.4027, lr_0 = 1.3990e-04
Loss = 3.4238e-02, PNorm = 71.0372, GNorm = 0.4137, lr_0 = 1.3980e-04
Loss = 3.5904e-02, PNorm = 71.0394, GNorm = 0.5216, lr_0 = 1.3970e-04
Loss = 3.9126e-02, PNorm = 71.0407, GNorm = 0.7101, lr_0 = 1.3961e-04
Loss = 3.6830e-02, PNorm = 71.0421, GNorm = 0.6314, lr_0 = 1.3951e-04
Loss = 3.4930e-02, PNorm = 71.0439, GNorm = 0.4117, lr_0 = 1.3942e-04
Loss = 3.7765e-02, PNorm = 71.0454, GNorm = 0.4523, lr_0 = 1.3932e-04
Loss = 4.2182e-02, PNorm = 71.0480, GNorm = 0.5680, lr_0 = 1.3923e-04
Loss = 3.4140e-02, PNorm = 71.0508, GNorm = 0.4583, lr_0 = 1.3913e-04
Loss = 3.4939e-02, PNorm = 71.0527, GNorm = 0.3521, lr_0 = 1.3904e-04
Loss = 3.5352e-02, PNorm = 71.0541, GNorm = 0.4547, lr_0 = 1.3894e-04
Validation mae = 0.387492
Epoch 26
Loss = 2.8852e-02, PNorm = 71.0553, GNorm = 0.2959, lr_0 = 1.3884e-04
Loss = 3.2040e-02, PNorm = 71.0578, GNorm = 0.4467, lr_0 = 1.3875e-04
Loss = 3.4081e-02, PNorm = 71.0597, GNorm = 0.6187, lr_0 = 1.3865e-04
Loss = 2.9269e-02, PNorm = 71.0617, GNorm = 0.3534, lr_0 = 1.3856e-04
Loss = 3.2222e-02, PNorm = 71.0642, GNorm = 0.4879, lr_0 = 1.3846e-04
Loss = 3.2264e-02, PNorm = 71.0660, GNorm = 0.4390, lr_0 = 1.3837e-04
Loss = 2.9501e-02, PNorm = 71.0675, GNorm = 0.4655, lr_0 = 1.3828e-04
Loss = 3.1988e-02, PNorm = 71.0695, GNorm = 0.4351, lr_0 = 1.3818e-04
Loss = 2.8933e-02, PNorm = 71.0717, GNorm = 0.5447, lr_0 = 1.3809e-04
Loss = 3.2715e-02, PNorm = 71.0738, GNorm = 0.6878, lr_0 = 1.3799e-04
Loss = 3.0970e-02, PNorm = 71.0755, GNorm = 0.4584, lr_0 = 1.3790e-04
Loss = 2.9204e-02, PNorm = 71.0770, GNorm = 0.3381, lr_0 = 1.3780e-04
Loss = 2.9615e-02, PNorm = 71.0789, GNorm = 0.3985, lr_0 = 1.3771e-04
Loss = 3.9917e-02, PNorm = 71.0812, GNorm = 0.5696, lr_0 = 1.3761e-04
Loss = 3.9376e-02, PNorm = 71.0836, GNorm = 0.4831, lr_0 = 1.3752e-04
Loss = 3.2096e-02, PNorm = 71.0864, GNorm = 0.4292, lr_0 = 1.3742e-04
Loss = 3.5857e-02, PNorm = 71.0885, GNorm = 0.6084, lr_0 = 1.3733e-04
Loss = 3.0748e-02, PNorm = 71.0895, GNorm = 0.5276, lr_0 = 1.3724e-04
Loss = 3.3522e-02, PNorm = 71.0910, GNorm = 0.3912, lr_0 = 1.3714e-04
Loss = 2.9624e-02, PNorm = 71.0934, GNorm = 0.4887, lr_0 = 1.3705e-04
Loss = 3.4553e-02, PNorm = 71.0960, GNorm = 0.3919, lr_0 = 1.3695e-04
Loss = 3.0867e-02, PNorm = 71.0980, GNorm = 0.4227, lr_0 = 1.3686e-04
Loss = 3.2583e-02, PNorm = 71.1002, GNorm = 0.6627, lr_0 = 1.3677e-04
Loss = 3.8116e-02, PNorm = 71.1018, GNorm = 0.4684, lr_0 = 1.3667e-04
Loss = 3.0977e-02, PNorm = 71.1033, GNorm = 0.3144, lr_0 = 1.3658e-04
Loss = 3.6278e-02, PNorm = 71.1039, GNorm = 0.5503, lr_0 = 1.3649e-04
Loss = 3.4372e-02, PNorm = 71.1051, GNorm = 0.7076, lr_0 = 1.3639e-04
Loss = 3.0067e-02, PNorm = 71.1063, GNorm = 0.4766, lr_0 = 1.3630e-04
Loss = 2.9996e-02, PNorm = 71.1081, GNorm = 0.4845, lr_0 = 1.3621e-04
Loss = 3.4549e-02, PNorm = 71.1097, GNorm = 0.7362, lr_0 = 1.3611e-04
Loss = 3.4682e-02, PNorm = 71.1109, GNorm = 0.3816, lr_0 = 1.3602e-04
Loss = 3.6273e-02, PNorm = 71.1130, GNorm = 0.4863, lr_0 = 1.3593e-04
Loss = 3.8271e-02, PNorm = 71.1161, GNorm = 0.5576, lr_0 = 1.3583e-04
Loss = 3.6603e-02, PNorm = 71.1190, GNorm = 0.5456, lr_0 = 1.3574e-04
Loss = 3.7096e-02, PNorm = 71.1210, GNorm = 0.5612, lr_0 = 1.3565e-04
Loss = 3.9297e-02, PNorm = 71.1235, GNorm = 0.4529, lr_0 = 1.3555e-04
Loss = 3.7430e-02, PNorm = 71.1256, GNorm = 0.7732, lr_0 = 1.3546e-04
Loss = 3.4050e-02, PNorm = 71.1280, GNorm = 0.6528, lr_0 = 1.3537e-04
Loss = 3.3856e-02, PNorm = 71.1296, GNorm = 0.4627, lr_0 = 1.3528e-04
Loss = 3.4490e-02, PNorm = 71.1301, GNorm = 0.5855, lr_0 = 1.3518e-04
Loss = 3.2023e-02, PNorm = 71.1312, GNorm = 0.6667, lr_0 = 1.3509e-04
Loss = 3.6069e-02, PNorm = 71.1336, GNorm = 0.6497, lr_0 = 1.3500e-04
Loss = 3.0094e-02, PNorm = 71.1352, GNorm = 0.4287, lr_0 = 1.3491e-04
Loss = 2.7948e-02, PNorm = 71.1370, GNorm = 0.3848, lr_0 = 1.3481e-04
Loss = 3.6091e-02, PNorm = 71.1396, GNorm = 0.6053, lr_0 = 1.3472e-04
Loss = 3.4970e-02, PNorm = 71.1422, GNorm = 0.6134, lr_0 = 1.3463e-04
Loss = 3.7322e-02, PNorm = 71.1443, GNorm = 0.6553, lr_0 = 1.3454e-04
Loss = 3.1800e-02, PNorm = 71.1457, GNorm = 0.5074, lr_0 = 1.3444e-04
Loss = 3.3558e-02, PNorm = 71.1467, GNorm = 0.4696, lr_0 = 1.3435e-04
Loss = 3.1965e-02, PNorm = 71.1480, GNorm = 0.4676, lr_0 = 1.3426e-04
Loss = 3.0821e-02, PNorm = 71.1488, GNorm = 0.5578, lr_0 = 1.3417e-04
Loss = 3.8040e-02, PNorm = 71.1498, GNorm = 0.4981, lr_0 = 1.3408e-04
Loss = 3.5672e-02, PNorm = 71.1516, GNorm = 0.4573, lr_0 = 1.3398e-04
Loss = 3.3411e-02, PNorm = 71.1536, GNorm = 0.4928, lr_0 = 1.3389e-04
Loss = 3.4388e-02, PNorm = 71.1564, GNorm = 0.4676, lr_0 = 1.3380e-04
Loss = 3.3489e-02, PNorm = 71.1585, GNorm = 0.4595, lr_0 = 1.3371e-04
Loss = 3.9250e-02, PNorm = 71.1609, GNorm = 0.5887, lr_0 = 1.3362e-04
Loss = 4.4857e-02, PNorm = 71.1634, GNorm = 0.4361, lr_0 = 1.3353e-04
Loss = 3.6946e-02, PNorm = 71.1659, GNorm = 0.4550, lr_0 = 1.3343e-04
Loss = 3.1176e-02, PNorm = 71.1692, GNorm = 0.4231, lr_0 = 1.3334e-04
Loss = 4.4342e-02, PNorm = 71.1726, GNorm = 0.5143, lr_0 = 1.3325e-04
Loss = 3.6716e-02, PNorm = 71.1754, GNorm = 0.4689, lr_0 = 1.3316e-04
Loss = 3.7862e-02, PNorm = 71.1774, GNorm = 0.4159, lr_0 = 1.3307e-04
Loss = 3.8980e-02, PNorm = 71.1784, GNorm = 0.7479, lr_0 = 1.3298e-04
Loss = 3.3354e-02, PNorm = 71.1798, GNorm = 0.4557, lr_0 = 1.3289e-04
Loss = 3.2757e-02, PNorm = 71.1807, GNorm = 0.4005, lr_0 = 1.3280e-04
Loss = 3.3146e-02, PNorm = 71.1815, GNorm = 0.4754, lr_0 = 1.3270e-04
Loss = 3.4066e-02, PNorm = 71.1831, GNorm = 0.3285, lr_0 = 1.3261e-04
Loss = 3.6588e-02, PNorm = 71.1842, GNorm = 0.5253, lr_0 = 1.3252e-04
Loss = 3.4508e-02, PNorm = 71.1856, GNorm = 0.3679, lr_0 = 1.3243e-04
Loss = 3.6136e-02, PNorm = 71.1861, GNorm = 0.4107, lr_0 = 1.3234e-04
Loss = 3.1485e-02, PNorm = 71.1878, GNorm = 0.4306, lr_0 = 1.3225e-04
Loss = 3.9080e-02, PNorm = 71.1895, GNorm = 0.5078, lr_0 = 1.3216e-04
Loss = 3.8571e-02, PNorm = 71.1911, GNorm = 0.4643, lr_0 = 1.3207e-04
Loss = 3.1207e-02, PNorm = 71.1922, GNorm = 0.4057, lr_0 = 1.3198e-04
Loss = 3.4363e-02, PNorm = 71.1933, GNorm = 0.5349, lr_0 = 1.3189e-04
Loss = 3.5488e-02, PNorm = 71.1946, GNorm = 0.4067, lr_0 = 1.3180e-04
Loss = 3.4803e-02, PNorm = 71.1963, GNorm = 0.4535, lr_0 = 1.3171e-04
Loss = 3.5689e-02, PNorm = 71.1994, GNorm = 0.5813, lr_0 = 1.3162e-04
Loss = 3.2782e-02, PNorm = 71.2017, GNorm = 0.6602, lr_0 = 1.3153e-04
Loss = 3.1086e-02, PNorm = 71.2037, GNorm = 0.5164, lr_0 = 1.3144e-04
Loss = 3.8560e-02, PNorm = 71.2052, GNorm = 0.5773, lr_0 = 1.3135e-04
Loss = 3.4351e-02, PNorm = 71.2064, GNorm = 0.5915, lr_0 = 1.3126e-04
Loss = 3.7927e-02, PNorm = 71.2078, GNorm = 0.4094, lr_0 = 1.3117e-04
Loss = 3.7960e-02, PNorm = 71.2092, GNorm = 0.5304, lr_0 = 1.3108e-04
Loss = 3.1210e-02, PNorm = 71.2113, GNorm = 0.4607, lr_0 = 1.3099e-04
Loss = 3.2891e-02, PNorm = 71.2132, GNorm = 0.4473, lr_0 = 1.3090e-04
Loss = 3.0125e-02, PNorm = 71.2153, GNorm = 0.4857, lr_0 = 1.3081e-04
Loss = 4.1899e-02, PNorm = 71.2174, GNorm = 0.5556, lr_0 = 1.3072e-04
Loss = 3.7066e-02, PNorm = 71.2196, GNorm = 0.4645, lr_0 = 1.3063e-04
Loss = 3.7096e-02, PNorm = 71.2223, GNorm = 0.5037, lr_0 = 1.3054e-04
Loss = 3.4088e-02, PNorm = 71.2247, GNorm = 0.6385, lr_0 = 1.3045e-04
Loss = 3.1644e-02, PNorm = 71.2264, GNorm = 0.5056, lr_0 = 1.3036e-04
Loss = 3.2708e-02, PNorm = 71.2280, GNorm = 0.6060, lr_0 = 1.3027e-04
Loss = 3.1109e-02, PNorm = 71.2299, GNorm = 0.3888, lr_0 = 1.3018e-04
Loss = 3.0328e-02, PNorm = 71.2313, GNorm = 0.7546, lr_0 = 1.3009e-04
Loss = 3.4730e-02, PNorm = 71.2329, GNorm = 0.4098, lr_0 = 1.3000e-04
Loss = 3.5471e-02, PNorm = 71.2342, GNorm = 0.4238, lr_0 = 1.2992e-04
Loss = 3.7212e-02, PNorm = 71.2358, GNorm = 0.4042, lr_0 = 1.2983e-04
Loss = 2.9683e-02, PNorm = 71.2386, GNorm = 0.6653, lr_0 = 1.2974e-04
Loss = 3.1638e-02, PNorm = 71.2407, GNorm = 0.4746, lr_0 = 1.2965e-04
Loss = 2.9493e-02, PNorm = 71.2425, GNorm = 0.4642, lr_0 = 1.2956e-04
Loss = 3.4235e-02, PNorm = 71.2440, GNorm = 0.4920, lr_0 = 1.2947e-04
Loss = 3.4587e-02, PNorm = 71.2456, GNorm = 0.4328, lr_0 = 1.2938e-04
Loss = 3.5081e-02, PNorm = 71.2470, GNorm = 0.5224, lr_0 = 1.2929e-04
Loss = 3.5769e-02, PNorm = 71.2485, GNorm = 0.3677, lr_0 = 1.2921e-04
Loss = 3.5457e-02, PNorm = 71.2513, GNorm = 0.7628, lr_0 = 1.2912e-04
Loss = 3.2454e-02, PNorm = 71.2537, GNorm = 0.5733, lr_0 = 1.2903e-04
Loss = 3.8855e-02, PNorm = 71.2557, GNorm = 0.7077, lr_0 = 1.2894e-04
Loss = 3.4677e-02, PNorm = 71.2567, GNorm = 0.5939, lr_0 = 1.2885e-04
Loss = 3.4711e-02, PNorm = 71.2578, GNorm = 0.5262, lr_0 = 1.2876e-04
Loss = 3.0608e-02, PNorm = 71.2583, GNorm = 0.4685, lr_0 = 1.2867e-04
Loss = 3.5786e-02, PNorm = 71.2599, GNorm = 0.7409, lr_0 = 1.2859e-04
Loss = 3.3607e-02, PNorm = 71.2631, GNorm = 0.4949, lr_0 = 1.2850e-04
Loss = 3.3807e-02, PNorm = 71.2651, GNorm = 0.5311, lr_0 = 1.2841e-04
Loss = 3.7544e-02, PNorm = 71.2670, GNorm = 0.6055, lr_0 = 1.2832e-04
Loss = 4.2215e-02, PNorm = 71.2684, GNorm = 0.5692, lr_0 = 1.2823e-04
Loss = 3.4762e-02, PNorm = 71.2702, GNorm = 0.4610, lr_0 = 1.2815e-04
Loss = 3.3714e-02, PNorm = 71.2716, GNorm = 0.5140, lr_0 = 1.2806e-04
Loss = 3.5488e-02, PNorm = 71.2728, GNorm = 0.6549, lr_0 = 1.2797e-04
Validation mae = 0.395031
Epoch 27
Loss = 3.5629e-02, PNorm = 71.2749, GNorm = 0.5224, lr_0 = 1.2788e-04
Loss = 2.8386e-02, PNorm = 71.2766, GNorm = 0.4518, lr_0 = 1.2780e-04
Loss = 3.2724e-02, PNorm = 71.2781, GNorm = 0.4400, lr_0 = 1.2771e-04
Loss = 3.5360e-02, PNorm = 71.2801, GNorm = 0.5345, lr_0 = 1.2762e-04
Loss = 2.6792e-02, PNorm = 71.2821, GNorm = 0.4548, lr_0 = 1.2753e-04
Loss = 2.9908e-02, PNorm = 71.2841, GNorm = 0.4727, lr_0 = 1.2745e-04
Loss = 3.1887e-02, PNorm = 71.2859, GNorm = 0.4425, lr_0 = 1.2736e-04
Loss = 2.9485e-02, PNorm = 71.2868, GNorm = 0.4379, lr_0 = 1.2727e-04
Loss = 3.0179e-02, PNorm = 71.2878, GNorm = 0.4721, lr_0 = 1.2718e-04
Loss = 3.2050e-02, PNorm = 71.2893, GNorm = 0.4294, lr_0 = 1.2710e-04
Loss = 2.7063e-02, PNorm = 71.2916, GNorm = 0.5290, lr_0 = 1.2701e-04
Loss = 3.6332e-02, PNorm = 71.2930, GNorm = 0.4991, lr_0 = 1.2692e-04
Loss = 3.3170e-02, PNorm = 71.2960, GNorm = 0.4693, lr_0 = 1.2684e-04
Loss = 3.0962e-02, PNorm = 71.2982, GNorm = 0.3420, lr_0 = 1.2675e-04
Loss = 2.9155e-02, PNorm = 71.3006, GNorm = 0.4935, lr_0 = 1.2666e-04
Loss = 3.1470e-02, PNorm = 71.3024, GNorm = 0.5215, lr_0 = 1.2658e-04
Loss = 3.3231e-02, PNorm = 71.3042, GNorm = 0.4876, lr_0 = 1.2649e-04
Loss = 2.8775e-02, PNorm = 71.3069, GNorm = 0.4709, lr_0 = 1.2640e-04
Loss = 2.5938e-02, PNorm = 71.3090, GNorm = 0.5137, lr_0 = 1.2632e-04
Loss = 2.6614e-02, PNorm = 71.3110, GNorm = 0.3723, lr_0 = 1.2623e-04
Loss = 3.0748e-02, PNorm = 71.3129, GNorm = 0.4401, lr_0 = 1.2614e-04
Loss = 2.6009e-02, PNorm = 71.3142, GNorm = 0.5585, lr_0 = 1.2606e-04
Loss = 3.2560e-02, PNorm = 71.3151, GNorm = 0.5701, lr_0 = 1.2597e-04
Loss = 3.3674e-02, PNorm = 71.3173, GNorm = 0.4483, lr_0 = 1.2588e-04
Loss = 3.0058e-02, PNorm = 71.3195, GNorm = 0.4429, lr_0 = 1.2580e-04
Loss = 3.0643e-02, PNorm = 71.3206, GNorm = 0.6354, lr_0 = 1.2571e-04
Loss = 3.3590e-02, PNorm = 71.3214, GNorm = 0.4562, lr_0 = 1.2563e-04
Loss = 3.2604e-02, PNorm = 71.3225, GNorm = 0.4837, lr_0 = 1.2554e-04
Loss = 2.6477e-02, PNorm = 71.3232, GNorm = 0.3292, lr_0 = 1.2545e-04
Loss = 2.8691e-02, PNorm = 71.3226, GNorm = 0.3664, lr_0 = 1.2537e-04
Loss = 3.7651e-02, PNorm = 71.3240, GNorm = 0.8869, lr_0 = 1.2528e-04
Loss = 3.5257e-02, PNorm = 71.3255, GNorm = 0.5347, lr_0 = 1.2520e-04
Loss = 2.9465e-02, PNorm = 71.3265, GNorm = 0.3485, lr_0 = 1.2511e-04
Loss = 3.1945e-02, PNorm = 71.3272, GNorm = 0.4736, lr_0 = 1.2502e-04
Loss = 3.0106e-02, PNorm = 71.3289, GNorm = 0.4515, lr_0 = 1.2494e-04
Loss = 3.7719e-02, PNorm = 71.3314, GNorm = 0.6708, lr_0 = 1.2485e-04
Loss = 3.1736e-02, PNorm = 71.3340, GNorm = 0.5323, lr_0 = 1.2477e-04
Loss = 3.3277e-02, PNorm = 71.3351, GNorm = 0.5438, lr_0 = 1.2468e-04
Loss = 3.5641e-02, PNorm = 71.3378, GNorm = 0.5548, lr_0 = 1.2460e-04
Loss = 3.1483e-02, PNorm = 71.3403, GNorm = 0.4758, lr_0 = 1.2451e-04
Loss = 3.3696e-02, PNorm = 71.3416, GNorm = 0.3987, lr_0 = 1.2443e-04
Loss = 3.3074e-02, PNorm = 71.3446, GNorm = 0.5190, lr_0 = 1.2434e-04
Loss = 3.3206e-02, PNorm = 71.3470, GNorm = 0.3748, lr_0 = 1.2426e-04
Loss = 3.4313e-02, PNorm = 71.3485, GNorm = 0.5657, lr_0 = 1.2417e-04
Loss = 2.9579e-02, PNorm = 71.3500, GNorm = 0.5041, lr_0 = 1.2409e-04
Loss = 3.2131e-02, PNorm = 71.3520, GNorm = 0.4856, lr_0 = 1.2400e-04
Loss = 3.0526e-02, PNorm = 71.3532, GNorm = 0.3937, lr_0 = 1.2392e-04
Loss = 2.9422e-02, PNorm = 71.3542, GNorm = 0.3638, lr_0 = 1.2383e-04
Loss = 2.9354e-02, PNorm = 71.3560, GNorm = 0.4609, lr_0 = 1.2375e-04
Loss = 2.9791e-02, PNorm = 71.3578, GNorm = 0.5196, lr_0 = 1.2366e-04
Loss = 3.3149e-02, PNorm = 71.3593, GNorm = 0.5115, lr_0 = 1.2358e-04
Loss = 3.4609e-02, PNorm = 71.3606, GNorm = 0.4817, lr_0 = 1.2349e-04
Loss = 3.3931e-02, PNorm = 71.3622, GNorm = 0.5456, lr_0 = 1.2341e-04
Loss = 3.0449e-02, PNorm = 71.3641, GNorm = 0.4119, lr_0 = 1.2332e-04
Loss = 3.4509e-02, PNorm = 71.3664, GNorm = 0.4093, lr_0 = 1.2324e-04
Loss = 3.2721e-02, PNorm = 71.3683, GNorm = 0.6801, lr_0 = 1.2315e-04
Loss = 3.1974e-02, PNorm = 71.3687, GNorm = 0.5188, lr_0 = 1.2307e-04
Loss = 2.9722e-02, PNorm = 71.3702, GNorm = 0.3487, lr_0 = 1.2298e-04
Loss = 3.2108e-02, PNorm = 71.3714, GNorm = 0.5349, lr_0 = 1.2290e-04
Loss = 2.9405e-02, PNorm = 71.3730, GNorm = 0.6128, lr_0 = 1.2282e-04
Loss = 2.9799e-02, PNorm = 71.3759, GNorm = 0.4566, lr_0 = 1.2273e-04
Loss = 3.1120e-02, PNorm = 71.3782, GNorm = 0.4167, lr_0 = 1.2265e-04
Loss = 3.1736e-02, PNorm = 71.3795, GNorm = 0.7296, lr_0 = 1.2256e-04
Loss = 3.7796e-02, PNorm = 71.3806, GNorm = 0.4704, lr_0 = 1.2248e-04
Loss = 3.3961e-02, PNorm = 71.3822, GNorm = 0.6153, lr_0 = 1.2240e-04
Loss = 3.3308e-02, PNorm = 71.3844, GNorm = 0.4018, lr_0 = 1.2231e-04
Loss = 3.4452e-02, PNorm = 71.3873, GNorm = 0.6875, lr_0 = 1.2223e-04
Loss = 3.0911e-02, PNorm = 71.3888, GNorm = 0.4700, lr_0 = 1.2214e-04
Loss = 3.1938e-02, PNorm = 71.3903, GNorm = 0.6948, lr_0 = 1.2206e-04
Loss = 3.1524e-02, PNorm = 71.3926, GNorm = 0.7544, lr_0 = 1.2198e-04
Loss = 3.0821e-02, PNorm = 71.3948, GNorm = 0.4958, lr_0 = 1.2189e-04
Loss = 3.5149e-02, PNorm = 71.3971, GNorm = 0.7020, lr_0 = 1.2181e-04
Loss = 2.8611e-02, PNorm = 71.3984, GNorm = 0.5724, lr_0 = 1.2173e-04
Loss = 3.2649e-02, PNorm = 71.3993, GNorm = 0.5225, lr_0 = 1.2164e-04
Loss = 3.9081e-02, PNorm = 71.4008, GNorm = 0.5451, lr_0 = 1.2156e-04
Loss = 3.5785e-02, PNorm = 71.4031, GNorm = 0.6993, lr_0 = 1.2148e-04
Loss = 3.5802e-02, PNorm = 71.4056, GNorm = 0.4436, lr_0 = 1.2139e-04
Loss = 4.4022e-02, PNorm = 71.4075, GNorm = 0.5755, lr_0 = 1.2131e-04
Loss = 3.8073e-02, PNorm = 71.4095, GNorm = 0.4923, lr_0 = 1.2123e-04
Loss = 3.0338e-02, PNorm = 71.4105, GNorm = 0.4493, lr_0 = 1.2114e-04
Loss = 2.8358e-02, PNorm = 71.4111, GNorm = 0.3772, lr_0 = 1.2106e-04
Loss = 3.5232e-02, PNorm = 71.4121, GNorm = 0.6230, lr_0 = 1.2098e-04
Loss = 3.3055e-02, PNorm = 71.4140, GNorm = 0.3722, lr_0 = 1.2090e-04
Loss = 3.0340e-02, PNorm = 71.4159, GNorm = 0.4885, lr_0 = 1.2081e-04
Loss = 4.0200e-02, PNorm = 71.4177, GNorm = 0.4436, lr_0 = 1.2073e-04
Loss = 3.3534e-02, PNorm = 71.4201, GNorm = 0.4276, lr_0 = 1.2065e-04
Loss = 3.6543e-02, PNorm = 71.4219, GNorm = 0.4199, lr_0 = 1.2056e-04
Loss = 3.1522e-02, PNorm = 71.4229, GNorm = 0.6844, lr_0 = 1.2048e-04
Loss = 3.0140e-02, PNorm = 71.4249, GNorm = 0.4781, lr_0 = 1.2040e-04
Loss = 3.2587e-02, PNorm = 71.4275, GNorm = 0.4150, lr_0 = 1.2032e-04
Loss = 3.6953e-02, PNorm = 71.4297, GNorm = 0.5685, lr_0 = 1.2023e-04
Loss = 3.3044e-02, PNorm = 71.4317, GNorm = 0.4890, lr_0 = 1.2015e-04
Loss = 3.7314e-02, PNorm = 71.4323, GNorm = 0.6334, lr_0 = 1.2007e-04
Loss = 3.3787e-02, PNorm = 71.4335, GNorm = 0.4122, lr_0 = 1.1999e-04
Loss = 3.5754e-02, PNorm = 71.4331, GNorm = 0.4234, lr_0 = 1.1991e-04
Loss = 2.9335e-02, PNorm = 71.4342, GNorm = 0.4647, lr_0 = 1.1982e-04
Loss = 3.3099e-02, PNorm = 71.4347, GNorm = 0.4275, lr_0 = 1.1974e-04
Loss = 3.6818e-02, PNorm = 71.4355, GNorm = 0.4302, lr_0 = 1.1966e-04
Loss = 3.4957e-02, PNorm = 71.4372, GNorm = 0.3662, lr_0 = 1.1958e-04
Loss = 4.1702e-02, PNorm = 71.4397, GNorm = 0.6512, lr_0 = 1.1950e-04
Loss = 3.0488e-02, PNorm = 71.4411, GNorm = 0.4000, lr_0 = 1.1941e-04
Loss = 3.7237e-02, PNorm = 71.4423, GNorm = 0.5286, lr_0 = 1.1933e-04
Loss = 3.1695e-02, PNorm = 71.4439, GNorm = 0.3378, lr_0 = 1.1925e-04
Loss = 3.2765e-02, PNorm = 71.4456, GNorm = 0.9203, lr_0 = 1.1917e-04
Loss = 3.3350e-02, PNorm = 71.4467, GNorm = 0.4649, lr_0 = 1.1909e-04
Loss = 3.4574e-02, PNorm = 71.4493, GNorm = 0.6174, lr_0 = 1.1901e-04
Loss = 2.9676e-02, PNorm = 71.4516, GNorm = 0.5079, lr_0 = 1.1892e-04
Loss = 3.2473e-02, PNorm = 71.4528, GNorm = 0.3973, lr_0 = 1.1884e-04
Loss = 3.3658e-02, PNorm = 71.4535, GNorm = 0.4231, lr_0 = 1.1876e-04
Loss = 3.6868e-02, PNorm = 71.4547, GNorm = 0.4746, lr_0 = 1.1868e-04
Loss = 3.4632e-02, PNorm = 71.4560, GNorm = 0.4238, lr_0 = 1.1860e-04
Loss = 2.8269e-02, PNorm = 71.4577, GNorm = 0.3639, lr_0 = 1.1852e-04
Loss = 3.7722e-02, PNorm = 71.4593, GNorm = 0.5871, lr_0 = 1.1844e-04
Loss = 4.2307e-02, PNorm = 71.4613, GNorm = 0.5612, lr_0 = 1.1835e-04
Loss = 3.3106e-02, PNorm = 71.4639, GNorm = 0.3942, lr_0 = 1.1827e-04
Loss = 3.9021e-02, PNorm = 71.4654, GNorm = 0.4433, lr_0 = 1.1819e-04
Loss = 4.0719e-02, PNorm = 71.4659, GNorm = 0.5726, lr_0 = 1.1811e-04
Loss = 3.5270e-02, PNorm = 71.4669, GNorm = 0.3966, lr_0 = 1.1803e-04
Loss = 3.1828e-02, PNorm = 71.4683, GNorm = 0.4692, lr_0 = 1.1795e-04
Loss = 3.4913e-02, PNorm = 71.4693, GNorm = 0.4702, lr_0 = 1.1787e-04
Validation mae = 0.389945
Epoch 28
Loss = 2.9397e-02, PNorm = 71.4710, GNorm = 0.6407, lr_0 = 1.1779e-04
Loss = 2.8791e-02, PNorm = 71.4737, GNorm = 0.6946, lr_0 = 1.1771e-04
Loss = 3.0054e-02, PNorm = 71.4755, GNorm = 0.6460, lr_0 = 1.1763e-04
Loss = 3.0745e-02, PNorm = 71.4774, GNorm = 0.5572, lr_0 = 1.1755e-04
Loss = 2.7852e-02, PNorm = 71.4799, GNorm = 0.7317, lr_0 = 1.1747e-04
Loss = 2.6420e-02, PNorm = 71.4819, GNorm = 0.3759, lr_0 = 1.1739e-04
Loss = 3.1456e-02, PNorm = 71.4838, GNorm = 0.5375, lr_0 = 1.1730e-04
Loss = 3.0029e-02, PNorm = 71.4853, GNorm = 0.3797, lr_0 = 1.1722e-04
Loss = 2.8106e-02, PNorm = 71.4876, GNorm = 0.4006, lr_0 = 1.1714e-04
Loss = 2.7262e-02, PNorm = 71.4900, GNorm = 0.4095, lr_0 = 1.1706e-04
Loss = 3.5240e-02, PNorm = 71.4921, GNorm = 0.5068, lr_0 = 1.1698e-04
Loss = 3.3093e-02, PNorm = 71.4928, GNorm = 0.5983, lr_0 = 1.1690e-04
Loss = 2.8360e-02, PNorm = 71.4936, GNorm = 0.4525, lr_0 = 1.1682e-04
Loss = 3.2492e-02, PNorm = 71.4955, GNorm = 0.4379, lr_0 = 1.1674e-04
Loss = 2.8907e-02, PNorm = 71.4980, GNorm = 0.4794, lr_0 = 1.1666e-04
Loss = 2.7479e-02, PNorm = 71.5000, GNorm = 0.3731, lr_0 = 1.1658e-04
Loss = 2.4539e-02, PNorm = 71.5016, GNorm = 0.3751, lr_0 = 1.1650e-04
Loss = 3.2526e-02, PNorm = 71.5025, GNorm = 0.4212, lr_0 = 1.1642e-04
Loss = 3.8647e-02, PNorm = 71.5045, GNorm = 0.6283, lr_0 = 1.1634e-04
Loss = 2.9848e-02, PNorm = 71.5066, GNorm = 0.4270, lr_0 = 1.1626e-04
Loss = 2.6801e-02, PNorm = 71.5079, GNorm = 0.5089, lr_0 = 1.1618e-04
Loss = 3.1176e-02, PNorm = 71.5083, GNorm = 0.6929, lr_0 = 1.1611e-04
Loss = 2.8653e-02, PNorm = 71.5093, GNorm = 0.4851, lr_0 = 1.1603e-04
Loss = 3.1779e-02, PNorm = 71.5106, GNorm = 0.4976, lr_0 = 1.1595e-04
Loss = 2.9013e-02, PNorm = 71.5128, GNorm = 0.4262, lr_0 = 1.1587e-04
Loss = 3.1695e-02, PNorm = 71.5143, GNorm = 0.6169, lr_0 = 1.1579e-04
Loss = 3.2855e-02, PNorm = 71.5167, GNorm = 0.5179, lr_0 = 1.1571e-04
Loss = 2.9183e-02, PNorm = 71.5189, GNorm = 0.5096, lr_0 = 1.1563e-04
Loss = 3.1616e-02, PNorm = 71.5201, GNorm = 0.3886, lr_0 = 1.1555e-04
Loss = 3.1888e-02, PNorm = 71.5216, GNorm = 0.3688, lr_0 = 1.1547e-04
Loss = 3.2308e-02, PNorm = 71.5223, GNorm = 0.4605, lr_0 = 1.1539e-04
Loss = 2.9352e-02, PNorm = 71.5235, GNorm = 0.6190, lr_0 = 1.1531e-04
Loss = 3.1890e-02, PNorm = 71.5250, GNorm = 0.4245, lr_0 = 1.1523e-04
Loss = 2.4172e-02, PNorm = 71.5265, GNorm = 0.3534, lr_0 = 1.1515e-04
Loss = 2.7313e-02, PNorm = 71.5281, GNorm = 0.2982, lr_0 = 1.1508e-04
Loss = 3.0292e-02, PNorm = 71.5285, GNorm = 0.4603, lr_0 = 1.1500e-04
Loss = 3.1904e-02, PNorm = 71.5295, GNorm = 0.4535, lr_0 = 1.1492e-04
Loss = 2.9180e-02, PNorm = 71.5308, GNorm = 0.6440, lr_0 = 1.1484e-04
Loss = 2.8197e-02, PNorm = 71.5326, GNorm = 0.4886, lr_0 = 1.1476e-04
Loss = 3.1018e-02, PNorm = 71.5345, GNorm = 0.4338, lr_0 = 1.1468e-04
Loss = 3.2927e-02, PNorm = 71.5359, GNorm = 0.4736, lr_0 = 1.1460e-04
Loss = 3.3712e-02, PNorm = 71.5374, GNorm = 0.4540, lr_0 = 1.1452e-04
Loss = 3.4344e-02, PNorm = 71.5394, GNorm = 0.4973, lr_0 = 1.1445e-04
Loss = 3.2444e-02, PNorm = 71.5412, GNorm = 0.4369, lr_0 = 1.1437e-04
Loss = 2.9705e-02, PNorm = 71.5429, GNorm = 0.5618, lr_0 = 1.1429e-04
Loss = 3.1501e-02, PNorm = 71.5443, GNorm = 0.5639, lr_0 = 1.1421e-04
Loss = 2.9312e-02, PNorm = 71.5458, GNorm = 0.5155, lr_0 = 1.1413e-04
Loss = 2.8131e-02, PNorm = 71.5471, GNorm = 0.5421, lr_0 = 1.1405e-04
Loss = 3.3560e-02, PNorm = 71.5490, GNorm = 0.5340, lr_0 = 1.1398e-04
Loss = 3.0711e-02, PNorm = 71.5509, GNorm = 0.3954, lr_0 = 1.1390e-04
Loss = 3.1561e-02, PNorm = 71.5524, GNorm = 0.4702, lr_0 = 1.1382e-04
Loss = 3.0388e-02, PNorm = 71.5540, GNorm = 0.5411, lr_0 = 1.1374e-04
Loss = 3.1115e-02, PNorm = 71.5558, GNorm = 0.4475, lr_0 = 1.1366e-04
Loss = 3.5685e-02, PNorm = 71.5572, GNorm = 0.5755, lr_0 = 1.1359e-04
Loss = 3.4910e-02, PNorm = 71.5580, GNorm = 0.7117, lr_0 = 1.1351e-04
Loss = 3.2695e-02, PNorm = 71.5596, GNorm = 0.6240, lr_0 = 1.1343e-04
Loss = 4.1662e-02, PNorm = 71.5608, GNorm = 0.7734, lr_0 = 1.1335e-04
Loss = 3.3975e-02, PNorm = 71.5639, GNorm = 0.4331, lr_0 = 1.1328e-04
Loss = 3.1385e-02, PNorm = 71.5661, GNorm = 0.3942, lr_0 = 1.1320e-04
Loss = 3.3872e-02, PNorm = 71.5676, GNorm = 0.5007, lr_0 = 1.1312e-04
Loss = 2.9881e-02, PNorm = 71.5690, GNorm = 0.5128, lr_0 = 1.1304e-04
Loss = 2.9116e-02, PNorm = 71.5700, GNorm = 0.3107, lr_0 = 1.1297e-04
Loss = 3.3278e-02, PNorm = 71.5720, GNorm = 0.4548, lr_0 = 1.1289e-04
Loss = 2.7662e-02, PNorm = 71.5727, GNorm = 0.3775, lr_0 = 1.1281e-04
Loss = 2.9672e-02, PNorm = 71.5734, GNorm = 0.5404, lr_0 = 1.1273e-04
Loss = 2.7340e-02, PNorm = 71.5753, GNorm = 0.3693, lr_0 = 1.1266e-04
Loss = 3.2486e-02, PNorm = 71.5768, GNorm = 0.5339, lr_0 = 1.1258e-04
Loss = 2.6784e-02, PNorm = 71.5787, GNorm = 0.4073, lr_0 = 1.1250e-04
Loss = 3.0649e-02, PNorm = 71.5800, GNorm = 0.4345, lr_0 = 1.1243e-04
Loss = 2.7817e-02, PNorm = 71.5810, GNorm = 0.3919, lr_0 = 1.1235e-04
Loss = 3.0844e-02, PNorm = 71.5819, GNorm = 0.6165, lr_0 = 1.1227e-04
Loss = 3.3933e-02, PNorm = 71.5829, GNorm = 0.6129, lr_0 = 1.1219e-04
Loss = 2.9455e-02, PNorm = 71.5840, GNorm = 0.3850, lr_0 = 1.1212e-04
Loss = 3.5427e-02, PNorm = 71.5846, GNorm = 0.5362, lr_0 = 1.1204e-04
Loss = 3.1250e-02, PNorm = 71.5860, GNorm = 0.3319, lr_0 = 1.1196e-04
Loss = 3.3409e-02, PNorm = 71.5872, GNorm = 0.6362, lr_0 = 1.1189e-04
Loss = 3.8604e-02, PNorm = 71.5876, GNorm = 0.9624, lr_0 = 1.1181e-04
Loss = 3.1492e-02, PNorm = 71.5887, GNorm = 0.6200, lr_0 = 1.1173e-04
Loss = 3.9328e-02, PNorm = 71.5904, GNorm = 0.5155, lr_0 = 1.1166e-04
Loss = 2.9392e-02, PNorm = 71.5912, GNorm = 0.5676, lr_0 = 1.1158e-04
Loss = 3.4044e-02, PNorm = 71.5920, GNorm = 0.5220, lr_0 = 1.1150e-04
Loss = 2.8745e-02, PNorm = 71.5928, GNorm = 0.5783, lr_0 = 1.1143e-04
Loss = 3.2215e-02, PNorm = 71.5942, GNorm = 0.7091, lr_0 = 1.1135e-04
Loss = 2.8650e-02, PNorm = 71.5953, GNorm = 0.5310, lr_0 = 1.1128e-04
Loss = 3.6252e-02, PNorm = 71.5962, GNorm = 0.4763, lr_0 = 1.1120e-04
Loss = 3.8083e-02, PNorm = 71.5980, GNorm = 0.4725, lr_0 = 1.1112e-04
Loss = 3.1279e-02, PNorm = 71.5995, GNorm = 0.4625, lr_0 = 1.1105e-04
Loss = 3.0086e-02, PNorm = 71.6012, GNorm = 0.4030, lr_0 = 1.1097e-04
Loss = 3.1211e-02, PNorm = 71.6026, GNorm = 0.4041, lr_0 = 1.1089e-04
Loss = 3.1647e-02, PNorm = 71.6037, GNorm = 0.6374, lr_0 = 1.1082e-04
Loss = 3.3633e-02, PNorm = 71.6044, GNorm = 0.4076, lr_0 = 1.1074e-04
Loss = 3.4660e-02, PNorm = 71.6063, GNorm = 0.4160, lr_0 = 1.1067e-04
Loss = 3.4127e-02, PNorm = 71.6083, GNorm = 0.4966, lr_0 = 1.1059e-04
Loss = 3.2622e-02, PNorm = 71.6102, GNorm = 0.5158, lr_0 = 1.1052e-04
Loss = 3.3953e-02, PNorm = 71.6109, GNorm = 0.4329, lr_0 = 1.1044e-04
Loss = 2.9506e-02, PNorm = 71.6121, GNorm = 0.4956, lr_0 = 1.1036e-04
Loss = 3.5440e-02, PNorm = 71.6120, GNorm = 0.7149, lr_0 = 1.1029e-04
Loss = 3.6196e-02, PNorm = 71.6139, GNorm = 0.4213, lr_0 = 1.1021e-04
Loss = 2.8790e-02, PNorm = 71.6162, GNorm = 0.5476, lr_0 = 1.1014e-04
Loss = 2.7446e-02, PNorm = 71.6175, GNorm = 0.6019, lr_0 = 1.1006e-04
Loss = 3.2149e-02, PNorm = 71.6177, GNorm = 0.7229, lr_0 = 1.0999e-04
Loss = 3.4941e-02, PNorm = 71.6179, GNorm = 0.4598, lr_0 = 1.0991e-04
Loss = 3.4836e-02, PNorm = 71.6188, GNorm = 0.4564, lr_0 = 1.0984e-04
Loss = 3.4077e-02, PNorm = 71.6200, GNorm = 0.5969, lr_0 = 1.0976e-04
Loss = 3.0254e-02, PNorm = 71.6216, GNorm = 0.4622, lr_0 = 1.0969e-04
Loss = 3.3001e-02, PNorm = 71.6226, GNorm = 0.4355, lr_0 = 1.0961e-04
Loss = 3.2501e-02, PNorm = 71.6239, GNorm = 0.3668, lr_0 = 1.0954e-04
Loss = 3.3441e-02, PNorm = 71.6258, GNorm = 0.5792, lr_0 = 1.0946e-04
Loss = 3.1020e-02, PNorm = 71.6274, GNorm = 0.4560, lr_0 = 1.0939e-04
Loss = 3.2576e-02, PNorm = 71.6289, GNorm = 0.4389, lr_0 = 1.0931e-04
Loss = 4.0225e-02, PNorm = 71.6313, GNorm = 0.3908, lr_0 = 1.0924e-04
Loss = 3.4460e-02, PNorm = 71.6336, GNorm = 0.4680, lr_0 = 1.0916e-04
Loss = 3.4568e-02, PNorm = 71.6362, GNorm = 0.4503, lr_0 = 1.0909e-04
Loss = 3.3359e-02, PNorm = 71.6372, GNorm = 0.4473, lr_0 = 1.0901e-04
Loss = 3.3559e-02, PNorm = 71.6383, GNorm = 0.6019, lr_0 = 1.0894e-04
Loss = 2.8976e-02, PNorm = 71.6402, GNorm = 0.4455, lr_0 = 1.0886e-04
Loss = 3.0961e-02, PNorm = 71.6410, GNorm = 0.3640, lr_0 = 1.0879e-04
Loss = 3.5335e-02, PNorm = 71.6419, GNorm = 0.4571, lr_0 = 1.0871e-04
Loss = 2.9629e-02, PNorm = 71.6429, GNorm = 0.4304, lr_0 = 1.0864e-04
Loss = 2.7395e-02, PNorm = 71.6446, GNorm = 0.4706, lr_0 = 1.0856e-04
Validation mae = 0.392444
Epoch 29
Loss = 3.1166e-02, PNorm = 71.6454, GNorm = 0.4007, lr_0 = 1.0849e-04
Loss = 2.3453e-02, PNorm = 71.6465, GNorm = 0.3851, lr_0 = 1.0841e-04
Loss = 2.7060e-02, PNorm = 71.6480, GNorm = 0.4311, lr_0 = 1.0834e-04
Loss = 2.7694e-02, PNorm = 71.6497, GNorm = 0.4163, lr_0 = 1.0827e-04
Loss = 2.7520e-02, PNorm = 71.6513, GNorm = 0.4103, lr_0 = 1.0819e-04
Loss = 3.1671e-02, PNorm = 71.6535, GNorm = 0.5002, lr_0 = 1.0812e-04
Loss = 2.6990e-02, PNorm = 71.6554, GNorm = 0.4958, lr_0 = 1.0804e-04
Loss = 3.0345e-02, PNorm = 71.6573, GNorm = 0.4243, lr_0 = 1.0797e-04
Loss = 3.0129e-02, PNorm = 71.6591, GNorm = 0.3761, lr_0 = 1.0790e-04
Loss = 2.8319e-02, PNorm = 71.6611, GNorm = 0.5661, lr_0 = 1.0782e-04
Loss = 2.4773e-02, PNorm = 71.6629, GNorm = 0.4677, lr_0 = 1.0775e-04
Loss = 2.6871e-02, PNorm = 71.6638, GNorm = 0.3542, lr_0 = 1.0767e-04
Loss = 2.9633e-02, PNorm = 71.6645, GNorm = 0.4408, lr_0 = 1.0760e-04
Loss = 2.9280e-02, PNorm = 71.6649, GNorm = 0.4426, lr_0 = 1.0753e-04
Loss = 3.0555e-02, PNorm = 71.6665, GNorm = 0.3431, lr_0 = 1.0745e-04
Loss = 2.6439e-02, PNorm = 71.6683, GNorm = 0.3654, lr_0 = 1.0738e-04
Loss = 2.7751e-02, PNorm = 71.6701, GNorm = 0.5525, lr_0 = 1.0731e-04
Loss = 3.0089e-02, PNorm = 71.6715, GNorm = 0.4781, lr_0 = 1.0723e-04
Loss = 2.8151e-02, PNorm = 71.6724, GNorm = 0.5223, lr_0 = 1.0716e-04
Loss = 2.6446e-02, PNorm = 71.6739, GNorm = 0.4407, lr_0 = 1.0709e-04
Loss = 3.0027e-02, PNorm = 71.6741, GNorm = 0.5251, lr_0 = 1.0701e-04
Loss = 2.9750e-02, PNorm = 71.6753, GNorm = 0.4084, lr_0 = 1.0694e-04
Loss = 2.8430e-02, PNorm = 71.6775, GNorm = 0.4037, lr_0 = 1.0687e-04
Loss = 2.7088e-02, PNorm = 71.6798, GNorm = 0.3834, lr_0 = 1.0679e-04
Loss = 3.2904e-02, PNorm = 71.6828, GNorm = 0.5378, lr_0 = 1.0672e-04
Loss = 2.7114e-02, PNorm = 71.6842, GNorm = 0.5509, lr_0 = 1.0665e-04
Loss = 2.9150e-02, PNorm = 71.6859, GNorm = 0.6027, lr_0 = 1.0657e-04
Loss = 2.8186e-02, PNorm = 71.6875, GNorm = 0.5373, lr_0 = 1.0650e-04
Loss = 3.0671e-02, PNorm = 71.6900, GNorm = 0.3540, lr_0 = 1.0643e-04
Loss = 3.0426e-02, PNorm = 71.6916, GNorm = 0.4605, lr_0 = 1.0635e-04
Loss = 3.1470e-02, PNorm = 71.6940, GNorm = 0.4650, lr_0 = 1.0628e-04
Loss = 3.4524e-02, PNorm = 71.6958, GNorm = 0.5589, lr_0 = 1.0621e-04
Loss = 3.0474e-02, PNorm = 71.6973, GNorm = 0.3930, lr_0 = 1.0614e-04
Loss = 2.9405e-02, PNorm = 71.6983, GNorm = 0.3619, lr_0 = 1.0606e-04
Loss = 3.3319e-02, PNorm = 71.6997, GNorm = 0.5554, lr_0 = 1.0599e-04
Loss = 3.1304e-02, PNorm = 71.7017, GNorm = 0.4351, lr_0 = 1.0592e-04
Loss = 2.6101e-02, PNorm = 71.7026, GNorm = 0.4883, lr_0 = 1.0585e-04
Loss = 2.6621e-02, PNorm = 71.7036, GNorm = 0.4107, lr_0 = 1.0577e-04
Loss = 3.2241e-02, PNorm = 71.7046, GNorm = 0.5291, lr_0 = 1.0570e-04
Loss = 2.4852e-02, PNorm = 71.7058, GNorm = 0.6595, lr_0 = 1.0563e-04
Loss = 3.0211e-02, PNorm = 71.7068, GNorm = 0.7227, lr_0 = 1.0556e-04
Loss = 3.1323e-02, PNorm = 71.7081, GNorm = 0.4197, lr_0 = 1.0548e-04
Loss = 2.9283e-02, PNorm = 71.7097, GNorm = 0.5553, lr_0 = 1.0541e-04
Loss = 2.5626e-02, PNorm = 71.7104, GNorm = 0.4836, lr_0 = 1.0534e-04
Loss = 3.0775e-02, PNorm = 71.7111, GNorm = 0.4582, lr_0 = 1.0527e-04
Loss = 2.6056e-02, PNorm = 71.7127, GNorm = 0.4209, lr_0 = 1.0519e-04
Loss = 3.6362e-02, PNorm = 71.7140, GNorm = 0.6700, lr_0 = 1.0512e-04
Loss = 2.8889e-02, PNorm = 71.7161, GNorm = 0.5767, lr_0 = 1.0505e-04
Loss = 2.8799e-02, PNorm = 71.7177, GNorm = 0.5238, lr_0 = 1.0498e-04
Loss = 2.6132e-02, PNorm = 71.7183, GNorm = 0.5985, lr_0 = 1.0491e-04
Loss = 2.6565e-02, PNorm = 71.7195, GNorm = 0.4527, lr_0 = 1.0483e-04
Loss = 3.1371e-02, PNorm = 71.7215, GNorm = 0.6049, lr_0 = 1.0476e-04
Loss = 2.8843e-02, PNorm = 71.7228, GNorm = 0.4674, lr_0 = 1.0469e-04
Loss = 3.0078e-02, PNorm = 71.7237, GNorm = 0.4618, lr_0 = 1.0462e-04
Loss = 2.7281e-02, PNorm = 71.7241, GNorm = 0.4002, lr_0 = 1.0455e-04
Loss = 2.7465e-02, PNorm = 71.7245, GNorm = 0.4014, lr_0 = 1.0448e-04
Loss = 3.1533e-02, PNorm = 71.7251, GNorm = 0.5441, lr_0 = 1.0440e-04
Loss = 3.2836e-02, PNorm = 71.7267, GNorm = 0.5084, lr_0 = 1.0433e-04
Loss = 3.1755e-02, PNorm = 71.7292, GNorm = 0.4283, lr_0 = 1.0426e-04
Loss = 3.2250e-02, PNorm = 71.7314, GNorm = 0.6533, lr_0 = 1.0419e-04
Loss = 2.7894e-02, PNorm = 71.7331, GNorm = 0.4443, lr_0 = 1.0412e-04
Loss = 3.5250e-02, PNorm = 71.7353, GNorm = 0.4027, lr_0 = 1.0405e-04
Loss = 2.9111e-02, PNorm = 71.7370, GNorm = 0.6784, lr_0 = 1.0398e-04
Loss = 2.7143e-02, PNorm = 71.7381, GNorm = 0.4223, lr_0 = 1.0391e-04
Loss = 3.5914e-02, PNorm = 71.7394, GNorm = 0.5071, lr_0 = 1.0383e-04
Loss = 3.6667e-02, PNorm = 71.7403, GNorm = 0.4244, lr_0 = 1.0376e-04
Loss = 2.8996e-02, PNorm = 71.7416, GNorm = 0.3107, lr_0 = 1.0369e-04
Loss = 3.0384e-02, PNorm = 71.7434, GNorm = 0.5313, lr_0 = 1.0362e-04
Loss = 3.1511e-02, PNorm = 71.7444, GNorm = 0.9589, lr_0 = 1.0355e-04
Loss = 2.6595e-02, PNorm = 71.7453, GNorm = 0.4582, lr_0 = 1.0348e-04
Loss = 2.8714e-02, PNorm = 71.7456, GNorm = 0.5366, lr_0 = 1.0341e-04
Loss = 3.2369e-02, PNorm = 71.7468, GNorm = 0.4408, lr_0 = 1.0334e-04
Loss = 3.1250e-02, PNorm = 71.7483, GNorm = 0.4301, lr_0 = 1.0327e-04
Loss = 3.2249e-02, PNorm = 71.7500, GNorm = 0.4414, lr_0 = 1.0320e-04
Loss = 3.2994e-02, PNorm = 71.7506, GNorm = 0.5136, lr_0 = 1.0312e-04
Loss = 2.6538e-02, PNorm = 71.7519, GNorm = 0.4393, lr_0 = 1.0305e-04
Loss = 2.5874e-02, PNorm = 71.7532, GNorm = 0.3959, lr_0 = 1.0298e-04
Loss = 2.4568e-02, PNorm = 71.7546, GNorm = 0.3501, lr_0 = 1.0291e-04
Loss = 2.7010e-02, PNorm = 71.7549, GNorm = 0.4887, lr_0 = 1.0284e-04
Loss = 3.0612e-02, PNorm = 71.7551, GNorm = 0.4681, lr_0 = 1.0277e-04
Loss = 2.7008e-02, PNorm = 71.7556, GNorm = 0.3180, lr_0 = 1.0270e-04
Loss = 3.4696e-02, PNorm = 71.7562, GNorm = 0.6383, lr_0 = 1.0263e-04
Loss = 2.4161e-02, PNorm = 71.7574, GNorm = 0.4289, lr_0 = 1.0256e-04
Loss = 2.9400e-02, PNorm = 71.7583, GNorm = 0.4178, lr_0 = 1.0249e-04
Loss = 3.2349e-02, PNorm = 71.7592, GNorm = 0.4909, lr_0 = 1.0242e-04
Loss = 3.1912e-02, PNorm = 71.7608, GNorm = 0.4259, lr_0 = 1.0235e-04
Loss = 3.1959e-02, PNorm = 71.7616, GNorm = 0.4473, lr_0 = 1.0228e-04
Loss = 3.0676e-02, PNorm = 71.7630, GNorm = 0.5576, lr_0 = 1.0221e-04
Loss = 3.0388e-02, PNorm = 71.7645, GNorm = 0.6800, lr_0 = 1.0214e-04
Loss = 2.9639e-02, PNorm = 71.7665, GNorm = 0.6103, lr_0 = 1.0207e-04
Loss = 3.5104e-02, PNorm = 71.7682, GNorm = 0.4252, lr_0 = 1.0200e-04
Loss = 2.8137e-02, PNorm = 71.7699, GNorm = 0.4059, lr_0 = 1.0193e-04
Loss = 3.3610e-02, PNorm = 71.7713, GNorm = 0.4614, lr_0 = 1.0186e-04
Loss = 3.2444e-02, PNorm = 71.7729, GNorm = 0.4409, lr_0 = 1.0179e-04
Loss = 2.9092e-02, PNorm = 71.7745, GNorm = 0.4803, lr_0 = 1.0172e-04
Loss = 2.8763e-02, PNorm = 71.7757, GNorm = 0.5517, lr_0 = 1.0165e-04
Loss = 3.1301e-02, PNorm = 71.7771, GNorm = 0.3564, lr_0 = 1.0158e-04
Loss = 3.3464e-02, PNorm = 71.7781, GNorm = 0.5314, lr_0 = 1.0151e-04
Loss = 3.0329e-02, PNorm = 71.7795, GNorm = 0.4890, lr_0 = 1.0144e-04
Loss = 3.1440e-02, PNorm = 71.7818, GNorm = 0.3949, lr_0 = 1.0137e-04
Loss = 3.1378e-02, PNorm = 71.7829, GNorm = 0.3606, lr_0 = 1.0130e-04
Loss = 3.3189e-02, PNorm = 71.7838, GNorm = 0.4245, lr_0 = 1.0123e-04
Loss = 3.1956e-02, PNorm = 71.7855, GNorm = 0.4691, lr_0 = 1.0116e-04
Loss = 3.0618e-02, PNorm = 71.7872, GNorm = 0.4346, lr_0 = 1.0110e-04
Loss = 3.1778e-02, PNorm = 71.7887, GNorm = 0.5848, lr_0 = 1.0103e-04
Loss = 3.1279e-02, PNorm = 71.7901, GNorm = 0.4173, lr_0 = 1.0096e-04
Loss = 3.2028e-02, PNorm = 71.7911, GNorm = 0.5805, lr_0 = 1.0089e-04
Loss = 3.2170e-02, PNorm = 71.7919, GNorm = 0.3775, lr_0 = 1.0082e-04
Loss = 3.1698e-02, PNorm = 71.7934, GNorm = 0.7357, lr_0 = 1.0075e-04
Loss = 3.8902e-02, PNorm = 71.7957, GNorm = 0.4896, lr_0 = 1.0068e-04
Loss = 3.0981e-02, PNorm = 71.7971, GNorm = 0.6908, lr_0 = 1.0061e-04
Loss = 2.7166e-02, PNorm = 71.7983, GNorm = 0.3487, lr_0 = 1.0054e-04
Loss = 2.9963e-02, PNorm = 71.7992, GNorm = 0.4205, lr_0 = 1.0047e-04
Loss = 2.4970e-02, PNorm = 71.8002, GNorm = 0.5086, lr_0 = 1.0041e-04
Loss = 3.1227e-02, PNorm = 71.8015, GNorm = 0.4690, lr_0 = 1.0034e-04
Loss = 3.0708e-02, PNorm = 71.8028, GNorm = 0.4859, lr_0 = 1.0027e-04
Loss = 3.5379e-02, PNorm = 71.8043, GNorm = 0.5262, lr_0 = 1.0020e-04
Loss = 3.2328e-02, PNorm = 71.8052, GNorm = 0.6242, lr_0 = 1.0013e-04
Loss = 3.7367e-02, PNorm = 71.8061, GNorm = 0.7541, lr_0 = 1.0006e-04
Loss = 2.9713e-02, PNorm = 71.8071, GNorm = 0.4694, lr_0 = 1.0000e-04
Validation mae = 0.392102
Model 0 best validation mae = 0.385387 on epoch 20
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Moving model to cuda
Model 0 test mae = 0.381854
Ensemble test mae = 0.381854
Fold 2
Splitting data with seed 2
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.0, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=400, bias=False)
        (W_h): Linear(in_features=400, out_features=400, bias=False)
        (W_o): Linear(in_features=533, out_features=400, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=400, out_features=400, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=400, out_features=1, bias=True)
  )
)
Number of parameters = 593,201
Moving model to cuda
Epoch 0
Loss = 1.0540e+00, PNorm = 38.3713, GNorm = 1.6012, lr_0 = 1.0413e-04
Loss = 8.5177e-01, PNorm = 38.3741, GNorm = 9.0969, lr_0 = 1.0788e-04
Loss = 8.0228e-01, PNorm = 38.3770, GNorm = 4.3072, lr_0 = 1.1163e-04
Loss = 7.7886e-01, PNorm = 38.3800, GNorm = 7.4847, lr_0 = 1.1537e-04
Loss = 7.3316e-01, PNorm = 38.3837, GNorm = 11.0753, lr_0 = 1.1913e-04
Loss = 6.3749e-01, PNorm = 38.3880, GNorm = 1.8432, lr_0 = 1.2287e-04
Loss = 6.2542e-01, PNorm = 38.3929, GNorm = 3.9631, lr_0 = 1.2663e-04
Loss = 6.3742e-01, PNorm = 38.3979, GNorm = 8.7301, lr_0 = 1.3038e-04
Loss = 6.3119e-01, PNorm = 38.4026, GNorm = 8.2946, lr_0 = 1.3413e-04
Loss = 6.2337e-01, PNorm = 38.4077, GNorm = 4.9285, lr_0 = 1.3788e-04
Loss = 6.2608e-01, PNorm = 38.4145, GNorm = 19.6160, lr_0 = 1.4163e-04
Loss = 5.9593e-01, PNorm = 38.4209, GNorm = 5.8923, lr_0 = 1.4537e-04
Loss = 5.9579e-01, PNorm = 38.4254, GNorm = 6.8836, lr_0 = 1.4913e-04
Loss = 5.8727e-01, PNorm = 38.4317, GNorm = 11.5672, lr_0 = 1.5288e-04
Loss = 5.1701e-01, PNorm = 38.4384, GNorm = 4.0229, lr_0 = 1.5662e-04
Loss = 4.5755e-01, PNorm = 38.4440, GNorm = 10.2861, lr_0 = 1.6038e-04
Loss = 4.9459e-01, PNorm = 38.4483, GNorm = 7.9770, lr_0 = 1.6412e-04
Loss = 4.9864e-01, PNorm = 38.4536, GNorm = 9.4266, lr_0 = 1.6788e-04
Loss = 4.9357e-01, PNorm = 38.4592, GNorm = 1.4394, lr_0 = 1.7163e-04
Loss = 4.5026e-01, PNorm = 38.4649, GNorm = 5.6745, lr_0 = 1.7538e-04
Loss = 4.3001e-01, PNorm = 38.4709, GNorm = 19.4632, lr_0 = 1.7913e-04
Loss = 5.3973e-01, PNorm = 38.4760, GNorm = 20.8827, lr_0 = 1.8288e-04
Loss = 4.5358e-01, PNorm = 38.4812, GNorm = 8.6758, lr_0 = 1.8662e-04
Loss = 4.2385e-01, PNorm = 38.4879, GNorm = 1.6063, lr_0 = 1.9038e-04
Loss = 4.6218e-01, PNorm = 38.4945, GNorm = 3.3574, lr_0 = 1.9413e-04
Loss = 4.3467e-01, PNorm = 38.5010, GNorm = 5.8297, lr_0 = 1.9788e-04
Loss = 3.7177e-01, PNorm = 38.5066, GNorm = 13.1177, lr_0 = 2.0163e-04
Loss = 4.1285e-01, PNorm = 38.5124, GNorm = 34.6007, lr_0 = 2.0537e-04
Loss = 5.2018e-01, PNorm = 38.5151, GNorm = 28.3073, lr_0 = 2.0913e-04
Loss = 5.1616e-01, PNorm = 38.5190, GNorm = 15.5756, lr_0 = 2.1288e-04
Loss = 4.7411e-01, PNorm = 38.5249, GNorm = 10.7122, lr_0 = 2.1663e-04
Loss = 3.9581e-01, PNorm = 38.5318, GNorm = 1.8922, lr_0 = 2.2038e-04
Loss = 4.0792e-01, PNorm = 38.5393, GNorm = 3.7818, lr_0 = 2.2412e-04
Loss = 3.5411e-01, PNorm = 38.5474, GNorm = 1.5441, lr_0 = 2.2787e-04
Loss = 3.8785e-01, PNorm = 38.5545, GNorm = 2.0521, lr_0 = 2.3163e-04
Loss = 3.1922e-01, PNorm = 38.5626, GNorm = 4.2228, lr_0 = 2.3538e-04
Loss = 3.3277e-01, PNorm = 38.5700, GNorm = 20.2144, lr_0 = 2.3913e-04
Loss = 3.3205e-01, PNorm = 38.5748, GNorm = 16.4035, lr_0 = 2.4288e-04
Loss = 3.8172e-01, PNorm = 38.5806, GNorm = 18.2272, lr_0 = 2.4662e-04
Loss = 3.6461e-01, PNorm = 38.5873, GNorm = 6.9785, lr_0 = 2.5038e-04
Loss = 3.6715e-01, PNorm = 38.5964, GNorm = 4.0602, lr_0 = 2.5413e-04
Loss = 3.3398e-01, PNorm = 38.6057, GNorm = 6.8903, lr_0 = 2.5788e-04
Loss = 2.9479e-01, PNorm = 38.6125, GNorm = 14.3131, lr_0 = 2.6163e-04
Loss = 3.2192e-01, PNorm = 38.6190, GNorm = 13.1238, lr_0 = 2.6537e-04
Loss = 3.2731e-01, PNorm = 38.6284, GNorm = 14.5033, lr_0 = 2.6912e-04
Loss = 3.2928e-01, PNorm = 38.6322, GNorm = 10.0142, lr_0 = 2.7288e-04
Loss = 3.6791e-01, PNorm = 38.6370, GNorm = 4.6417, lr_0 = 2.7663e-04
Loss = 3.1901e-01, PNorm = 38.6438, GNorm = 10.4403, lr_0 = 2.8038e-04
Loss = 3.4352e-01, PNorm = 38.6474, GNorm = 2.7302, lr_0 = 2.8413e-04
Loss = 3.0036e-01, PNorm = 38.6542, GNorm = 2.9583, lr_0 = 2.8787e-04
Loss = 3.3297e-01, PNorm = 38.6634, GNorm = 18.8171, lr_0 = 2.9163e-04
Loss = 3.0745e-01, PNorm = 38.6679, GNorm = 7.3136, lr_0 = 2.9538e-04
Loss = 3.3066e-01, PNorm = 38.6754, GNorm = 3.3008, lr_0 = 2.9913e-04
Loss = 3.5829e-01, PNorm = 38.6803, GNorm = 23.3775, lr_0 = 3.0288e-04
Loss = 3.8218e-01, PNorm = 38.6878, GNorm = 13.9343, lr_0 = 3.0662e-04
Loss = 3.9615e-01, PNorm = 38.6967, GNorm = 20.1204, lr_0 = 3.1037e-04
Loss = 3.3682e-01, PNorm = 38.7037, GNorm = 7.7371, lr_0 = 3.1413e-04
Loss = 2.9716e-01, PNorm = 38.7131, GNorm = 2.0854, lr_0 = 3.1788e-04
Loss = 3.2899e-01, PNorm = 38.7184, GNorm = 2.0620, lr_0 = 3.2163e-04
Loss = 3.0831e-01, PNorm = 38.7240, GNorm = 7.6701, lr_0 = 3.2538e-04
Loss = 3.0457e-01, PNorm = 38.7298, GNorm = 12.1496, lr_0 = 3.2912e-04
Loss = 3.2226e-01, PNorm = 38.7354, GNorm = 1.3855, lr_0 = 3.3288e-04
Loss = 4.2571e-01, PNorm = 38.7396, GNorm = 25.0355, lr_0 = 3.3663e-04
Loss = 3.2552e-01, PNorm = 38.7457, GNorm = 3.5052, lr_0 = 3.4038e-04
Loss = 3.2330e-01, PNorm = 38.7566, GNorm = 2.4888, lr_0 = 3.4413e-04
Loss = 3.1748e-01, PNorm = 38.7681, GNorm = 7.4408, lr_0 = 3.4787e-04
Loss = 2.9597e-01, PNorm = 38.7735, GNorm = 13.4645, lr_0 = 3.5162e-04
Loss = 2.9339e-01, PNorm = 38.7803, GNorm = 3.2388, lr_0 = 3.5538e-04
Loss = 3.4697e-01, PNorm = 38.7897, GNorm = 18.3963, lr_0 = 3.5913e-04
Loss = 3.5850e-01, PNorm = 38.7963, GNorm = 4.1542, lr_0 = 3.6288e-04
Loss = 4.0806e-01, PNorm = 38.8063, GNorm = 9.4992, lr_0 = 3.6662e-04
Loss = 3.2773e-01, PNorm = 38.8166, GNorm = 11.8062, lr_0 = 3.7037e-04
Loss = 3.3796e-01, PNorm = 38.8262, GNorm = 7.9220, lr_0 = 3.7413e-04
Loss = 3.0619e-01, PNorm = 38.8336, GNorm = 5.5122, lr_0 = 3.7788e-04
Loss = 2.9965e-01, PNorm = 38.8409, GNorm = 15.7929, lr_0 = 3.8163e-04
Loss = 3.2058e-01, PNorm = 38.8496, GNorm = 1.7993, lr_0 = 3.8537e-04
Loss = 2.5897e-01, PNorm = 38.8598, GNorm = 7.5024, lr_0 = 3.8912e-04
Loss = 3.2769e-01, PNorm = 38.8670, GNorm = 5.9840, lr_0 = 3.9287e-04
Loss = 3.0326e-01, PNorm = 38.8743, GNorm = 3.8228, lr_0 = 3.9663e-04
Loss = 2.7785e-01, PNorm = 38.8877, GNorm = 8.9360, lr_0 = 4.0038e-04
Loss = 2.5620e-01, PNorm = 38.8970, GNorm = 1.2264, lr_0 = 4.0413e-04
Loss = 2.5802e-01, PNorm = 38.9066, GNorm = 3.7209, lr_0 = 4.0787e-04
Loss = 2.7299e-01, PNorm = 38.9132, GNorm = 2.9500, lr_0 = 4.1162e-04
Loss = 3.5475e-01, PNorm = 38.9231, GNorm = 12.5552, lr_0 = 4.1537e-04
Loss = 2.9020e-01, PNorm = 38.9279, GNorm = 6.5900, lr_0 = 4.1913e-04
Loss = 3.7700e-01, PNorm = 38.9371, GNorm = 3.3620, lr_0 = 4.2288e-04
Loss = 2.8629e-01, PNorm = 38.9496, GNorm = 0.7169, lr_0 = 4.2662e-04
Loss = 3.0368e-01, PNorm = 38.9639, GNorm = 11.5860, lr_0 = 4.3037e-04
Loss = 3.2914e-01, PNorm = 38.9719, GNorm = 12.7855, lr_0 = 4.3412e-04
Loss = 3.2626e-01, PNorm = 38.9803, GNorm = 8.8595, lr_0 = 4.3788e-04
Loss = 3.6166e-01, PNorm = 38.9961, GNorm = 2.6010, lr_0 = 4.4163e-04
Loss = 3.5080e-01, PNorm = 39.0093, GNorm = 14.5817, lr_0 = 4.4538e-04
Loss = 3.1525e-01, PNorm = 39.0219, GNorm = 0.9978, lr_0 = 4.4912e-04
Loss = 3.1473e-01, PNorm = 39.0322, GNorm = 12.1564, lr_0 = 4.5287e-04
Loss = 3.2020e-01, PNorm = 39.0437, GNorm = 10.1591, lr_0 = 4.5662e-04
Loss = 2.6493e-01, PNorm = 39.0570, GNorm = 6.0708, lr_0 = 4.6038e-04
Loss = 2.8502e-01, PNorm = 39.0701, GNorm = 6.0984, lr_0 = 4.6413e-04
Loss = 2.9033e-01, PNorm = 39.0753, GNorm = 11.9566, lr_0 = 4.6787e-04
Loss = 2.6753e-01, PNorm = 39.0841, GNorm = 4.1300, lr_0 = 4.7162e-04
Loss = 2.4121e-01, PNorm = 39.0967, GNorm = 15.4428, lr_0 = 4.7537e-04
Loss = 3.5502e-01, PNorm = 39.1083, GNorm = 13.2641, lr_0 = 4.7913e-04
Loss = 3.5416e-01, PNorm = 39.1236, GNorm = 10.4036, lr_0 = 4.8288e-04
Loss = 2.6523e-01, PNorm = 39.1350, GNorm = 3.9579, lr_0 = 4.8663e-04
Loss = 2.9319e-01, PNorm = 39.1517, GNorm = 1.1712, lr_0 = 4.9038e-04
Loss = 2.5398e-01, PNorm = 39.1677, GNorm = 5.0905, lr_0 = 4.9412e-04
Loss = 2.5069e-01, PNorm = 39.1795, GNorm = 6.6415, lr_0 = 4.9788e-04
Loss = 3.2064e-01, PNorm = 39.1852, GNorm = 6.6360, lr_0 = 5.0163e-04
Loss = 2.7292e-01, PNorm = 39.2003, GNorm = 4.1338, lr_0 = 5.0538e-04
Loss = 2.6821e-01, PNorm = 39.2139, GNorm = 7.0438, lr_0 = 5.0913e-04
Loss = 2.7437e-01, PNorm = 39.2263, GNorm = 3.7430, lr_0 = 5.1287e-04
Loss = 2.6963e-01, PNorm = 39.2405, GNorm = 3.2333, lr_0 = 5.1663e-04
Loss = 2.5059e-01, PNorm = 39.2549, GNorm = 5.1394, lr_0 = 5.2038e-04
Loss = 2.7849e-01, PNorm = 39.2660, GNorm = 3.3000, lr_0 = 5.2413e-04
Loss = 2.7493e-01, PNorm = 39.2749, GNorm = 6.2444, lr_0 = 5.2788e-04
Loss = 3.0578e-01, PNorm = 39.2845, GNorm = 12.1263, lr_0 = 5.3162e-04
Loss = 2.9047e-01, PNorm = 39.3007, GNorm = 7.6575, lr_0 = 5.3538e-04
Loss = 2.8814e-01, PNorm = 39.3162, GNorm = 6.8730, lr_0 = 5.3912e-04
Loss = 2.9868e-01, PNorm = 39.3368, GNorm = 5.8628, lr_0 = 5.4288e-04
Loss = 2.9500e-01, PNorm = 39.3515, GNorm = 6.2646, lr_0 = 5.4663e-04
Loss = 2.7910e-01, PNorm = 39.3689, GNorm = 1.4933, lr_0 = 5.5038e-04
Validation mae = 0.583929
Epoch 1
Loss = 2.9565e-01, PNorm = 39.3851, GNorm = 7.2783, lr_0 = 5.5413e-04
Loss = 2.3504e-01, PNorm = 39.3967, GNorm = 3.4778, lr_0 = 5.5787e-04
Loss = 2.5114e-01, PNorm = 39.4063, GNorm = 3.3175, lr_0 = 5.6163e-04
Loss = 3.0633e-01, PNorm = 39.4171, GNorm = 1.2949, lr_0 = 5.6538e-04
Loss = 3.6221e-01, PNorm = 39.4356, GNorm = 8.9139, lr_0 = 5.6913e-04
Loss = 3.5370e-01, PNorm = 39.4543, GNorm = 4.4637, lr_0 = 5.7288e-04
Loss = 3.8505e-01, PNorm = 39.4767, GNorm = 5.1982, lr_0 = 5.7662e-04
Loss = 2.5438e-01, PNorm = 39.5024, GNorm = 0.9204, lr_0 = 5.8038e-04
Loss = 2.6797e-01, PNorm = 39.5192, GNorm = 1.8740, lr_0 = 5.8413e-04
Loss = 2.6531e-01, PNorm = 39.5348, GNorm = 2.1111, lr_0 = 5.8788e-04
Loss = 2.7331e-01, PNorm = 39.5479, GNorm = 2.9773, lr_0 = 5.9163e-04
Loss = 3.1217e-01, PNorm = 39.5687, GNorm = 2.4745, lr_0 = 5.9538e-04
Loss = 2.5299e-01, PNorm = 39.5866, GNorm = 1.3531, lr_0 = 5.9913e-04
Loss = 2.4485e-01, PNorm = 39.6028, GNorm = 2.3722, lr_0 = 6.0288e-04
Loss = 2.2797e-01, PNorm = 39.6142, GNorm = 3.7552, lr_0 = 6.0663e-04
Loss = 2.6281e-01, PNorm = 39.6241, GNorm = 1.7572, lr_0 = 6.1038e-04
Loss = 2.6963e-01, PNorm = 39.6370, GNorm = 0.7788, lr_0 = 6.1413e-04
Loss = 2.5155e-01, PNorm = 39.6528, GNorm = 2.4920, lr_0 = 6.1788e-04
Loss = 2.8008e-01, PNorm = 39.6642, GNorm = 2.7528, lr_0 = 6.2163e-04
Loss = 2.6793e-01, PNorm = 39.6780, GNorm = 6.3836, lr_0 = 6.2538e-04
Loss = 2.2495e-01, PNorm = 39.6951, GNorm = 2.3881, lr_0 = 6.2913e-04
Loss = 2.5265e-01, PNorm = 39.7155, GNorm = 1.2639, lr_0 = 6.3288e-04
Loss = 2.5843e-01, PNorm = 39.7333, GNorm = 9.1872, lr_0 = 6.3663e-04
Loss = 2.8051e-01, PNorm = 39.7485, GNorm = 13.1509, lr_0 = 6.4038e-04
Loss = 2.4600e-01, PNorm = 39.7652, GNorm = 5.2675, lr_0 = 6.4413e-04
Loss = 2.4289e-01, PNorm = 39.7832, GNorm = 3.5958, lr_0 = 6.4788e-04
Loss = 2.3266e-01, PNorm = 39.7979, GNorm = 4.1042, lr_0 = 6.5163e-04
Loss = 2.4799e-01, PNorm = 39.8118, GNorm = 6.9209, lr_0 = 6.5538e-04
Loss = 2.2184e-01, PNorm = 39.8261, GNorm = 1.8618, lr_0 = 6.5913e-04
Loss = 2.3437e-01, PNorm = 39.8428, GNorm = 7.1121, lr_0 = 6.6288e-04
Loss = 2.4304e-01, PNorm = 39.8591, GNorm = 9.0099, lr_0 = 6.6663e-04
Loss = 2.3673e-01, PNorm = 39.8763, GNorm = 2.8552, lr_0 = 6.7038e-04
Loss = 2.4017e-01, PNorm = 39.8968, GNorm = 2.3860, lr_0 = 6.7413e-04
Loss = 2.3715e-01, PNorm = 39.9164, GNorm = 2.9616, lr_0 = 6.7788e-04
Loss = 3.2180e-01, PNorm = 39.9353, GNorm = 1.8563, lr_0 = 6.8163e-04
Loss = 2.5090e-01, PNorm = 39.9572, GNorm = 4.0723, lr_0 = 6.8538e-04
Loss = 2.1696e-01, PNorm = 39.9779, GNorm = 3.9579, lr_0 = 6.8913e-04
Loss = 2.3795e-01, PNorm = 39.9916, GNorm = 7.6923, lr_0 = 6.9288e-04
Loss = 2.9912e-01, PNorm = 40.0041, GNorm = 1.4012, lr_0 = 6.9663e-04
Loss = 2.9757e-01, PNorm = 40.0178, GNorm = 3.2054, lr_0 = 7.0038e-04
Loss = 2.7995e-01, PNorm = 40.0382, GNorm = 6.3229, lr_0 = 7.0413e-04
Loss = 3.0943e-01, PNorm = 40.0608, GNorm = 12.3801, lr_0 = 7.0788e-04
Loss = 3.1885e-01, PNorm = 40.0878, GNorm = 6.0070, lr_0 = 7.1163e-04
Loss = 2.9608e-01, PNorm = 40.1120, GNorm = 2.0552, lr_0 = 7.1538e-04
Loss = 2.6046e-01, PNorm = 40.1336, GNorm = 5.1007, lr_0 = 7.1913e-04
Loss = 2.4213e-01, PNorm = 40.1521, GNorm = 2.5628, lr_0 = 7.2288e-04
Loss = 2.3041e-01, PNorm = 40.1683, GNorm = 3.6099, lr_0 = 7.2663e-04
Loss = 2.6533e-01, PNorm = 40.1832, GNorm = 9.7804, lr_0 = 7.3038e-04
Loss = 2.8262e-01, PNorm = 40.2012, GNorm = 1.7379, lr_0 = 7.3413e-04
Loss = 2.5424e-01, PNorm = 40.2228, GNorm = 0.6474, lr_0 = 7.3788e-04
Loss = 2.7976e-01, PNorm = 40.2448, GNorm = 1.1528, lr_0 = 7.4163e-04
Loss = 2.1246e-01, PNorm = 40.2610, GNorm = 2.1618, lr_0 = 7.4538e-04
Loss = 2.1107e-01, PNorm = 40.2733, GNorm = 2.6885, lr_0 = 7.4913e-04
Loss = 2.3682e-01, PNorm = 40.2912, GNorm = 0.9188, lr_0 = 7.5288e-04
Loss = 2.2874e-01, PNorm = 40.3102, GNorm = 1.2898, lr_0 = 7.5663e-04
Loss = 2.0873e-01, PNorm = 40.3261, GNorm = 0.9942, lr_0 = 7.6038e-04
Loss = 2.3805e-01, PNorm = 40.3517, GNorm = 4.4296, lr_0 = 7.6413e-04
Loss = 2.2473e-01, PNorm = 40.3683, GNorm = 2.1145, lr_0 = 7.6788e-04
Loss = 2.0001e-01, PNorm = 40.3900, GNorm = 5.7962, lr_0 = 7.7163e-04
Loss = 2.7981e-01, PNorm = 40.4076, GNorm = 3.4791, lr_0 = 7.7538e-04
Loss = 2.8973e-01, PNorm = 40.4368, GNorm = 1.9887, lr_0 = 7.7913e-04
Loss = 2.4491e-01, PNorm = 40.4681, GNorm = 4.9041, lr_0 = 7.8288e-04
Loss = 2.2447e-01, PNorm = 40.4942, GNorm = 8.4426, lr_0 = 7.8663e-04
Loss = 2.6401e-01, PNorm = 40.5156, GNorm = 1.4628, lr_0 = 7.9038e-04
Loss = 2.4068e-01, PNorm = 40.5417, GNorm = 2.0120, lr_0 = 7.9413e-04
Loss = 2.2095e-01, PNorm = 40.5624, GNorm = 7.9323, lr_0 = 7.9788e-04
Loss = 2.9113e-01, PNorm = 40.5772, GNorm = 5.4087, lr_0 = 8.0163e-04
Loss = 2.7091e-01, PNorm = 40.6002, GNorm = 1.4037, lr_0 = 8.0538e-04
Loss = 2.7631e-01, PNorm = 40.6331, GNorm = 0.9278, lr_0 = 8.0913e-04
Loss = 2.3871e-01, PNorm = 40.6704, GNorm = 4.1702, lr_0 = 8.1288e-04
Loss = 2.6199e-01, PNorm = 40.6903, GNorm = 3.5040, lr_0 = 8.1663e-04
Loss = 2.4654e-01, PNorm = 40.7133, GNorm = 2.7311, lr_0 = 8.2038e-04
Loss = 2.2887e-01, PNorm = 40.7364, GNorm = 2.0574, lr_0 = 8.2413e-04
Loss = 2.0408e-01, PNorm = 40.7538, GNorm = 2.3112, lr_0 = 8.2788e-04
Loss = 2.3815e-01, PNorm = 40.7740, GNorm = 13.5007, lr_0 = 8.3163e-04
Loss = 2.5674e-01, PNorm = 40.7911, GNorm = 2.1174, lr_0 = 8.3538e-04
Loss = 2.4095e-01, PNorm = 40.8186, GNorm = 2.0367, lr_0 = 8.3913e-04
Loss = 2.6090e-01, PNorm = 40.8388, GNorm = 1.9205, lr_0 = 8.4288e-04
Loss = 2.6353e-01, PNorm = 40.8689, GNorm = 5.5794, lr_0 = 8.4663e-04
Loss = 2.1788e-01, PNorm = 40.8954, GNorm = 3.9915, lr_0 = 8.5038e-04
Loss = 2.2415e-01, PNorm = 40.9254, GNorm = 5.1604, lr_0 = 8.5413e-04
Loss = 2.4011e-01, PNorm = 40.9437, GNorm = 0.7959, lr_0 = 8.5788e-04
Loss = 2.2051e-01, PNorm = 40.9630, GNorm = 2.0522, lr_0 = 8.6163e-04
Loss = 2.2988e-01, PNorm = 40.9867, GNorm = 1.3815, lr_0 = 8.6538e-04
Loss = 1.7841e-01, PNorm = 41.0135, GNorm = 1.9074, lr_0 = 8.6913e-04
Loss = 2.4094e-01, PNorm = 41.0385, GNorm = 7.8995, lr_0 = 8.7288e-04
Loss = 2.6726e-01, PNorm = 41.0605, GNorm = 6.3677, lr_0 = 8.7663e-04
Loss = 2.4862e-01, PNorm = 41.0926, GNorm = 3.8254, lr_0 = 8.8038e-04
Loss = 2.1155e-01, PNorm = 41.1138, GNorm = 3.2447, lr_0 = 8.8413e-04
Loss = 2.5844e-01, PNorm = 41.1265, GNorm = 1.4393, lr_0 = 8.8788e-04
Loss = 3.1069e-01, PNorm = 41.1603, GNorm = 0.9367, lr_0 = 8.9163e-04
Loss = 2.4470e-01, PNorm = 41.1868, GNorm = 1.1925, lr_0 = 8.9538e-04
Loss = 2.6484e-01, PNorm = 41.2159, GNorm = 1.0472, lr_0 = 8.9913e-04
Loss = 2.2700e-01, PNorm = 41.2427, GNorm = 4.0872, lr_0 = 9.0288e-04
Loss = 2.1525e-01, PNorm = 41.2717, GNorm = 1.3600, lr_0 = 9.0663e-04
Loss = 2.2793e-01, PNorm = 41.3017, GNorm = 1.1374, lr_0 = 9.1038e-04
Loss = 2.2767e-01, PNorm = 41.3236, GNorm = 6.3807, lr_0 = 9.1413e-04
Loss = 2.4567e-01, PNorm = 41.3350, GNorm = 2.0275, lr_0 = 9.1788e-04
Loss = 2.1655e-01, PNorm = 41.3546, GNorm = 2.9349, lr_0 = 9.2163e-04
Loss = 2.2180e-01, PNorm = 41.3824, GNorm = 3.7291, lr_0 = 9.2538e-04
Loss = 2.1122e-01, PNorm = 41.4066, GNorm = 1.5490, lr_0 = 9.2913e-04
Loss = 2.0889e-01, PNorm = 41.4287, GNorm = 2.9911, lr_0 = 9.3288e-04
Loss = 2.3231e-01, PNorm = 41.4553, GNorm = 2.6425, lr_0 = 9.3663e-04
Loss = 2.5138e-01, PNorm = 41.4872, GNorm = 3.7294, lr_0 = 9.4038e-04
Loss = 2.3859e-01, PNorm = 41.5224, GNorm = 3.8911, lr_0 = 9.4413e-04
Loss = 2.3454e-01, PNorm = 41.5502, GNorm = 1.3338, lr_0 = 9.4788e-04
Loss = 2.6247e-01, PNorm = 41.5766, GNorm = 3.6344, lr_0 = 9.5163e-04
Loss = 2.6029e-01, PNorm = 41.6134, GNorm = 1.8014, lr_0 = 9.5538e-04
Loss = 2.4611e-01, PNorm = 41.6425, GNorm = 0.9569, lr_0 = 9.5913e-04
Loss = 2.0280e-01, PNorm = 41.6711, GNorm = 2.5704, lr_0 = 9.6288e-04
Loss = 1.9641e-01, PNorm = 41.6889, GNorm = 1.0153, lr_0 = 9.6663e-04
Loss = 2.2084e-01, PNorm = 41.7102, GNorm = 1.4605, lr_0 = 9.7038e-04
Loss = 2.1201e-01, PNorm = 41.7337, GNorm = 1.5628, lr_0 = 9.7413e-04
Loss = 2.3849e-01, PNorm = 41.7554, GNorm = 6.5384, lr_0 = 9.7788e-04
Loss = 1.7751e-01, PNorm = 41.7814, GNorm = 3.6435, lr_0 = 9.8163e-04
Loss = 2.2159e-01, PNorm = 41.8028, GNorm = 1.2373, lr_0 = 9.8537e-04
Loss = 2.2931e-01, PNorm = 41.8263, GNorm = 2.2845, lr_0 = 9.8912e-04
Loss = 1.9952e-01, PNorm = 41.8572, GNorm = 1.2340, lr_0 = 9.9288e-04
Loss = 2.2229e-01, PNorm = 41.8895, GNorm = 2.4637, lr_0 = 9.9663e-04
Loss = 2.4138e-01, PNorm = 41.9143, GNorm = 6.2763, lr_0 = 9.9993e-04
Validation mae = 0.515572
Epoch 2
Loss = 2.2322e-01, PNorm = 41.9394, GNorm = 5.5089, lr_0 = 9.9925e-04
Loss = 2.0452e-01, PNorm = 41.9638, GNorm = 2.8058, lr_0 = 9.9856e-04
Loss = 2.0584e-01, PNorm = 41.9941, GNorm = 3.0700, lr_0 = 9.9788e-04
Loss = 2.1585e-01, PNorm = 42.0239, GNorm = 4.5991, lr_0 = 9.9719e-04
Loss = 2.1101e-01, PNorm = 42.0493, GNorm = 0.8239, lr_0 = 9.9651e-04
Loss = 2.0432e-01, PNorm = 42.0655, GNorm = 0.7940, lr_0 = 9.9583e-04
Loss = 2.5109e-01, PNorm = 42.0969, GNorm = 9.3459, lr_0 = 9.9515e-04
Loss = 2.2592e-01, PNorm = 42.1199, GNorm = 4.1530, lr_0 = 9.9446e-04
Loss = 2.2763e-01, PNorm = 42.1414, GNorm = 1.1865, lr_0 = 9.9378e-04
Loss = 1.9368e-01, PNorm = 42.1681, GNorm = 2.9327, lr_0 = 9.9310e-04
Loss = 1.9573e-01, PNorm = 42.1980, GNorm = 1.7407, lr_0 = 9.9242e-04
Loss = 1.8165e-01, PNorm = 42.2293, GNorm = 5.5227, lr_0 = 9.9174e-04
Loss = 2.0820e-01, PNorm = 42.2477, GNorm = 4.2129, lr_0 = 9.9106e-04
Loss = 1.8678e-01, PNorm = 42.2644, GNorm = 1.4448, lr_0 = 9.9038e-04
Loss = 1.8963e-01, PNorm = 42.2866, GNorm = 0.9558, lr_0 = 9.8971e-04
Loss = 2.0220e-01, PNorm = 42.3100, GNorm = 4.6531, lr_0 = 9.8903e-04
Loss = 2.0554e-01, PNorm = 42.3286, GNorm = 0.8213, lr_0 = 9.8835e-04
Loss = 2.1080e-01, PNorm = 42.3557, GNorm = 4.1268, lr_0 = 9.8767e-04
Loss = 1.9909e-01, PNorm = 42.3842, GNorm = 1.8466, lr_0 = 9.8700e-04
Loss = 2.2219e-01, PNorm = 42.4165, GNorm = 4.5593, lr_0 = 9.8632e-04
Loss = 1.8803e-01, PNorm = 42.4470, GNorm = 0.7596, lr_0 = 9.8564e-04
Loss = 1.7676e-01, PNorm = 42.4758, GNorm = 1.1554, lr_0 = 9.8497e-04
Loss = 2.0117e-01, PNorm = 42.4961, GNorm = 4.5413, lr_0 = 9.8429e-04
Loss = 2.1784e-01, PNorm = 42.5144, GNorm = 3.9511, lr_0 = 9.8362e-04
Loss = 1.8583e-01, PNorm = 42.5438, GNorm = 2.0710, lr_0 = 9.8295e-04
Loss = 2.1348e-01, PNorm = 42.5741, GNorm = 6.0130, lr_0 = 9.8227e-04
Loss = 2.0460e-01, PNorm = 42.5943, GNorm = 2.6168, lr_0 = 9.8160e-04
Loss = 2.0313e-01, PNorm = 42.6192, GNorm = 1.0823, lr_0 = 9.8093e-04
Loss = 1.9354e-01, PNorm = 42.6370, GNorm = 0.9006, lr_0 = 9.8026e-04
Loss = 2.1627e-01, PNorm = 42.6518, GNorm = 1.8713, lr_0 = 9.7958e-04
Loss = 2.0922e-01, PNorm = 42.6783, GNorm = 5.6412, lr_0 = 9.7891e-04
Loss = 1.6839e-01, PNorm = 42.6980, GNorm = 1.0454, lr_0 = 9.7824e-04
Loss = 2.6778e-01, PNorm = 42.7262, GNorm = 4.9560, lr_0 = 9.7757e-04
Loss = 2.0919e-01, PNorm = 42.7561, GNorm = 1.8181, lr_0 = 9.7690e-04
Loss = 2.2763e-01, PNorm = 42.7776, GNorm = 4.1560, lr_0 = 9.7623e-04
Loss = 1.9651e-01, PNorm = 42.8060, GNorm = 4.9258, lr_0 = 9.7556e-04
Loss = 2.2588e-01, PNorm = 42.8357, GNorm = 2.6196, lr_0 = 9.7490e-04
Loss = 1.8838e-01, PNorm = 42.8665, GNorm = 2.4534, lr_0 = 9.7423e-04
Loss = 1.9918e-01, PNorm = 42.8936, GNorm = 0.9342, lr_0 = 9.7356e-04
Loss = 2.0924e-01, PNorm = 42.9165, GNorm = 3.6453, lr_0 = 9.7289e-04
Loss = 2.0702e-01, PNorm = 42.9394, GNorm = 1.2109, lr_0 = 9.7223e-04
Loss = 1.6085e-01, PNorm = 42.9656, GNorm = 1.2661, lr_0 = 9.7156e-04
Loss = 1.8327e-01, PNorm = 42.9957, GNorm = 2.6055, lr_0 = 9.7090e-04
Loss = 1.8261e-01, PNorm = 43.0230, GNorm = 2.0029, lr_0 = 9.7023e-04
Loss = 1.9145e-01, PNorm = 43.0484, GNorm = 3.0471, lr_0 = 9.6957e-04
Loss = 2.0033e-01, PNorm = 43.0675, GNorm = 2.3002, lr_0 = 9.6890e-04
Loss = 2.2983e-01, PNorm = 43.0959, GNorm = 1.6690, lr_0 = 9.6824e-04
Loss = 1.8659e-01, PNorm = 43.1272, GNorm = 1.1248, lr_0 = 9.6757e-04
Loss = 1.7028e-01, PNorm = 43.1518, GNorm = 2.2608, lr_0 = 9.6691e-04
Loss = 2.4658e-01, PNorm = 43.1672, GNorm = 7.9959, lr_0 = 9.6625e-04
Loss = 2.1441e-01, PNorm = 43.1931, GNorm = 3.1547, lr_0 = 9.6559e-04
Loss = 2.1378e-01, PNorm = 43.2280, GNorm = 1.3375, lr_0 = 9.6493e-04
Loss = 2.5842e-01, PNorm = 43.2589, GNorm = 4.1904, lr_0 = 9.6427e-04
Loss = 1.9209e-01, PNorm = 43.2880, GNorm = 1.0497, lr_0 = 9.6360e-04
Loss = 1.8864e-01, PNorm = 43.3142, GNorm = 1.0039, lr_0 = 9.6294e-04
Loss = 2.0144e-01, PNorm = 43.3427, GNorm = 2.0271, lr_0 = 9.6228e-04
Loss = 1.9081e-01, PNorm = 43.3716, GNorm = 1.0991, lr_0 = 9.6163e-04
Loss = 1.8342e-01, PNorm = 43.3957, GNorm = 1.3084, lr_0 = 9.6097e-04
Loss = 1.7770e-01, PNorm = 43.4113, GNorm = 1.3327, lr_0 = 9.6031e-04
Loss = 1.7074e-01, PNorm = 43.4359, GNorm = 1.0522, lr_0 = 9.5965e-04
Loss = 1.8247e-01, PNorm = 43.4633, GNorm = 1.1618, lr_0 = 9.5899e-04
Loss = 2.2139e-01, PNorm = 43.4943, GNorm = 1.1911, lr_0 = 9.5834e-04
Loss = 2.0241e-01, PNorm = 43.5157, GNorm = 3.5229, lr_0 = 9.5768e-04
Loss = 2.0821e-01, PNorm = 43.5471, GNorm = 2.2659, lr_0 = 9.5702e-04
Loss = 2.1344e-01, PNorm = 43.5731, GNorm = 3.8362, lr_0 = 9.5637e-04
Loss = 1.9436e-01, PNorm = 43.6128, GNorm = 1.3730, lr_0 = 9.5571e-04
Loss = 1.4961e-01, PNorm = 43.6377, GNorm = 1.1892, lr_0 = 9.5506e-04
Loss = 1.9641e-01, PNorm = 43.6599, GNorm = 0.9205, lr_0 = 9.5440e-04
Loss = 1.7730e-01, PNorm = 43.6829, GNorm = 1.2684, lr_0 = 9.5375e-04
Loss = 1.9566e-01, PNorm = 43.6984, GNorm = 2.1084, lr_0 = 9.5310e-04
Loss = 1.9652e-01, PNorm = 43.7163, GNorm = 1.2189, lr_0 = 9.5244e-04
Loss = 1.9540e-01, PNorm = 43.7478, GNorm = 1.4335, lr_0 = 9.5179e-04
Loss = 1.6905e-01, PNorm = 43.7801, GNorm = 1.5245, lr_0 = 9.5114e-04
Loss = 2.0239e-01, PNorm = 43.7933, GNorm = 2.2536, lr_0 = 9.5049e-04
Loss = 2.0873e-01, PNorm = 43.8184, GNorm = 5.7250, lr_0 = 9.4984e-04
Loss = 2.4991e-01, PNorm = 43.8493, GNorm = 4.3540, lr_0 = 9.4919e-04
Loss = 1.9282e-01, PNorm = 43.8857, GNorm = 0.8331, lr_0 = 9.4854e-04
Loss = 1.9303e-01, PNorm = 43.9200, GNorm = 1.6404, lr_0 = 9.4789e-04
Loss = 1.9960e-01, PNorm = 43.9401, GNorm = 1.7166, lr_0 = 9.4724e-04
Loss = 1.8421e-01, PNorm = 43.9657, GNorm = 2.1949, lr_0 = 9.4659e-04
Loss = 1.5410e-01, PNorm = 43.9877, GNorm = 1.0611, lr_0 = 9.4594e-04
Loss = 1.7158e-01, PNorm = 44.0116, GNorm = 1.6496, lr_0 = 9.4529e-04
Loss = 1.9985e-01, PNorm = 44.0275, GNorm = 0.8168, lr_0 = 9.4464e-04
Loss = 1.7374e-01, PNorm = 44.0514, GNorm = 0.8120, lr_0 = 9.4400e-04
Loss = 1.6626e-01, PNorm = 44.0794, GNorm = 1.7735, lr_0 = 9.4335e-04
Loss = 2.2032e-01, PNorm = 44.0986, GNorm = 1.8953, lr_0 = 9.4270e-04
Loss = 1.7888e-01, PNorm = 44.1262, GNorm = 0.9760, lr_0 = 9.4206e-04
Loss = 1.5941e-01, PNorm = 44.1476, GNorm = 1.0430, lr_0 = 9.4141e-04
Loss = 1.9959e-01, PNorm = 44.1660, GNorm = 2.3780, lr_0 = 9.4077e-04
Loss = 1.9911e-01, PNorm = 44.1861, GNorm = 0.6952, lr_0 = 9.4012e-04
Loss = 2.0652e-01, PNorm = 44.2158, GNorm = 5.0983, lr_0 = 9.3948e-04
Loss = 1.9161e-01, PNorm = 44.2485, GNorm = 3.3187, lr_0 = 9.3884e-04
Loss = 1.8615e-01, PNorm = 44.2730, GNorm = 1.8419, lr_0 = 9.3819e-04
Loss = 1.8702e-01, PNorm = 44.3000, GNorm = 2.8328, lr_0 = 9.3755e-04
Loss = 2.0267e-01, PNorm = 44.3281, GNorm = 2.4439, lr_0 = 9.3691e-04
Loss = 1.8630e-01, PNorm = 44.3514, GNorm = 1.3969, lr_0 = 9.3627e-04
Loss = 1.9052e-01, PNorm = 44.3627, GNorm = 1.3293, lr_0 = 9.3562e-04
Loss = 1.7085e-01, PNorm = 44.3793, GNorm = 1.2840, lr_0 = 9.3498e-04
Loss = 1.9110e-01, PNorm = 44.3984, GNorm = 4.7675, lr_0 = 9.3434e-04
Loss = 1.7775e-01, PNorm = 44.4209, GNorm = 1.8099, lr_0 = 9.3370e-04
Loss = 1.9206e-01, PNorm = 44.4426, GNorm = 3.2936, lr_0 = 9.3306e-04
Loss = 1.9688e-01, PNorm = 44.4665, GNorm = 0.8185, lr_0 = 9.3242e-04
Loss = 2.0970e-01, PNorm = 44.4801, GNorm = 1.2982, lr_0 = 9.3178e-04
Loss = 2.0929e-01, PNorm = 44.4954, GNorm = 1.5335, lr_0 = 9.3115e-04
Loss = 1.8632e-01, PNorm = 44.5180, GNorm = 0.6541, lr_0 = 9.3051e-04
Loss = 1.8884e-01, PNorm = 44.5453, GNorm = 3.8393, lr_0 = 9.2987e-04
Loss = 2.2242e-01, PNorm = 44.5611, GNorm = 1.0695, lr_0 = 9.2923e-04
Loss = 1.8792e-01, PNorm = 44.5844, GNorm = 1.5815, lr_0 = 9.2860e-04
Loss = 1.8966e-01, PNorm = 44.6079, GNorm = 1.1823, lr_0 = 9.2796e-04
Loss = 1.7605e-01, PNorm = 44.6241, GNorm = 1.7985, lr_0 = 9.2733e-04
Loss = 1.6790e-01, PNorm = 44.6434, GNorm = 1.6828, lr_0 = 9.2669e-04
Loss = 1.5574e-01, PNorm = 44.6667, GNorm = 0.8880, lr_0 = 9.2606e-04
Loss = 1.8034e-01, PNorm = 44.6910, GNorm = 2.0063, lr_0 = 9.2542e-04
Loss = 1.7657e-01, PNorm = 44.7091, GNorm = 2.6118, lr_0 = 9.2479e-04
Loss = 2.0994e-01, PNorm = 44.7309, GNorm = 2.2401, lr_0 = 9.2415e-04
Loss = 1.9706e-01, PNorm = 44.7597, GNorm = 1.6725, lr_0 = 9.2352e-04
Loss = 1.7113e-01, PNorm = 44.7873, GNorm = 2.4792, lr_0 = 9.2289e-04
Loss = 1.7788e-01, PNorm = 44.8037, GNorm = 3.0593, lr_0 = 9.2226e-04
Loss = 1.6831e-01, PNorm = 44.8213, GNorm = 1.5179, lr_0 = 9.2162e-04
Loss = 1.7675e-01, PNorm = 44.8516, GNorm = 1.8023, lr_0 = 9.2099e-04
Validation mae = 0.518581
Epoch 3
Loss = 2.1627e-01, PNorm = 44.8788, GNorm = 2.1158, lr_0 = 9.2036e-04
Loss = 1.7874e-01, PNorm = 44.9029, GNorm = 1.1031, lr_0 = 9.1973e-04
Loss = 1.6792e-01, PNorm = 44.9314, GNorm = 2.3646, lr_0 = 9.1910e-04
Loss = 1.9184e-01, PNorm = 44.9514, GNorm = 2.2767, lr_0 = 9.1847e-04
Loss = 1.5768e-01, PNorm = 44.9705, GNorm = 2.1609, lr_0 = 9.1784e-04
Loss = 1.7270e-01, PNorm = 44.9932, GNorm = 0.7090, lr_0 = 9.1721e-04
Loss = 1.7830e-01, PNorm = 45.0160, GNorm = 1.1610, lr_0 = 9.1658e-04
Loss = 1.8055e-01, PNorm = 45.0424, GNorm = 1.2059, lr_0 = 9.1596e-04
Loss = 1.8670e-01, PNorm = 45.0739, GNorm = 3.0838, lr_0 = 9.1533e-04
Loss = 1.8782e-01, PNorm = 45.0916, GNorm = 1.3991, lr_0 = 9.1470e-04
Loss = 1.9328e-01, PNorm = 45.1176, GNorm = 1.1535, lr_0 = 9.1408e-04
Loss = 1.9494e-01, PNorm = 45.1433, GNorm = 2.8375, lr_0 = 9.1345e-04
Loss = 1.5380e-01, PNorm = 45.1623, GNorm = 1.4062, lr_0 = 9.1282e-04
Loss = 1.6025e-01, PNorm = 45.1881, GNorm = 2.9838, lr_0 = 9.1220e-04
Loss = 1.6614e-01, PNorm = 45.2159, GNorm = 1.1343, lr_0 = 9.1157e-04
Loss = 1.7253e-01, PNorm = 45.2416, GNorm = 1.6592, lr_0 = 9.1095e-04
Loss = 1.6077e-01, PNorm = 45.2678, GNorm = 2.1213, lr_0 = 9.1032e-04
Loss = 1.7700e-01, PNorm = 45.2937, GNorm = 2.0485, lr_0 = 9.0970e-04
Loss = 1.8354e-01, PNorm = 45.3087, GNorm = 1.1180, lr_0 = 9.0908e-04
Loss = 1.6498e-01, PNorm = 45.3259, GNorm = 0.7291, lr_0 = 9.0846e-04
Loss = 1.8345e-01, PNorm = 45.3453, GNorm = 0.6566, lr_0 = 9.0783e-04
Loss = 1.8522e-01, PNorm = 45.3686, GNorm = 1.7315, lr_0 = 9.0721e-04
Loss = 1.8344e-01, PNorm = 45.3883, GNorm = 2.8928, lr_0 = 9.0659e-04
Loss = 1.7523e-01, PNorm = 45.4113, GNorm = 1.4942, lr_0 = 9.0597e-04
Loss = 1.5620e-01, PNorm = 45.4406, GNorm = 1.1465, lr_0 = 9.0535e-04
Loss = 1.5411e-01, PNorm = 45.4552, GNorm = 0.9872, lr_0 = 9.0473e-04
Loss = 1.6700e-01, PNorm = 45.4776, GNorm = 1.1852, lr_0 = 9.0411e-04
Loss = 1.8564e-01, PNorm = 45.4988, GNorm = 2.3401, lr_0 = 9.0349e-04
Loss = 1.5771e-01, PNorm = 45.5159, GNorm = 0.6504, lr_0 = 9.0287e-04
Loss = 1.6353e-01, PNorm = 45.5328, GNorm = 1.0130, lr_0 = 9.0225e-04
Loss = 1.4484e-01, PNorm = 45.5556, GNorm = 0.6304, lr_0 = 9.0163e-04
Loss = 1.7034e-01, PNorm = 45.5766, GNorm = 2.3076, lr_0 = 9.0102e-04
Loss = 2.1190e-01, PNorm = 45.6024, GNorm = 1.7184, lr_0 = 9.0040e-04
Loss = 1.7550e-01, PNorm = 45.6231, GNorm = 1.4398, lr_0 = 8.9978e-04
Loss = 1.7146e-01, PNorm = 45.6370, GNorm = 1.3444, lr_0 = 8.9916e-04
Loss = 1.8784e-01, PNorm = 45.6632, GNorm = 2.7233, lr_0 = 8.9855e-04
Loss = 1.5508e-01, PNorm = 45.6881, GNorm = 1.3204, lr_0 = 8.9793e-04
Loss = 1.5518e-01, PNorm = 45.7027, GNorm = 3.5049, lr_0 = 8.9732e-04
Loss = 1.7401e-01, PNorm = 45.7168, GNorm = 1.0380, lr_0 = 8.9670e-04
Loss = 1.9438e-01, PNorm = 45.7421, GNorm = 1.4236, lr_0 = 8.9609e-04
Loss = 1.6774e-01, PNorm = 45.7711, GNorm = 1.1551, lr_0 = 8.9548e-04
Loss = 1.4999e-01, PNorm = 45.7945, GNorm = 0.8573, lr_0 = 8.9486e-04
Loss = 1.5860e-01, PNorm = 45.8128, GNorm = 2.4018, lr_0 = 8.9425e-04
Loss = 1.5339e-01, PNorm = 45.8253, GNorm = 1.1357, lr_0 = 8.9364e-04
Loss = 1.9056e-01, PNorm = 45.8361, GNorm = 1.0951, lr_0 = 8.9302e-04
Loss = 1.6407e-01, PNorm = 45.8551, GNorm = 0.7684, lr_0 = 8.9241e-04
Loss = 1.8056e-01, PNorm = 45.8746, GNorm = 3.8933, lr_0 = 8.9180e-04
Loss = 1.8284e-01, PNorm = 45.8927, GNorm = 3.1911, lr_0 = 8.9119e-04
Loss = 1.7975e-01, PNorm = 45.9162, GNorm = 3.9497, lr_0 = 8.9058e-04
Loss = 1.8994e-01, PNorm = 45.9505, GNorm = 4.2484, lr_0 = 8.8997e-04
Loss = 1.9188e-01, PNorm = 45.9805, GNorm = 3.8559, lr_0 = 8.8936e-04
Loss = 2.0096e-01, PNorm = 46.0168, GNorm = 1.7403, lr_0 = 8.8875e-04
Loss = 1.6926e-01, PNorm = 46.0427, GNorm = 2.4705, lr_0 = 8.8814e-04
Loss = 1.7533e-01, PNorm = 46.0679, GNorm = 3.3294, lr_0 = 8.8753e-04
Loss = 1.8180e-01, PNorm = 46.0893, GNorm = 1.3053, lr_0 = 8.8693e-04
Loss = 1.6995e-01, PNorm = 46.1006, GNorm = 2.4965, lr_0 = 8.8632e-04
Loss = 1.6030e-01, PNorm = 46.1265, GNorm = 1.2609, lr_0 = 8.8571e-04
Loss = 1.8084e-01, PNorm = 46.1404, GNorm = 0.9013, lr_0 = 8.8510e-04
Loss = 1.6733e-01, PNorm = 46.1628, GNorm = 0.5022, lr_0 = 8.8450e-04
Loss = 1.6450e-01, PNorm = 46.1895, GNorm = 1.3588, lr_0 = 8.8389e-04
Loss = 1.8606e-01, PNorm = 46.2126, GNorm = 1.6703, lr_0 = 8.8329e-04
Loss = 1.6409e-01, PNorm = 46.2421, GNorm = 0.8052, lr_0 = 8.8268e-04
Loss = 1.4820e-01, PNorm = 46.2710, GNorm = 1.9606, lr_0 = 8.8208e-04
Loss = 1.4051e-01, PNorm = 46.2937, GNorm = 1.3746, lr_0 = 8.8147e-04
Loss = 1.6718e-01, PNorm = 46.3016, GNorm = 1.4933, lr_0 = 8.8087e-04
Loss = 1.5842e-01, PNorm = 46.3186, GNorm = 1.1125, lr_0 = 8.8026e-04
Loss = 1.7045e-01, PNorm = 46.3380, GNorm = 1.7731, lr_0 = 8.7966e-04
Loss = 1.5763e-01, PNorm = 46.3533, GNorm = 2.7624, lr_0 = 8.7906e-04
Loss = 1.4248e-01, PNorm = 46.3727, GNorm = 1.0412, lr_0 = 8.7846e-04
Loss = 1.6276e-01, PNorm = 46.3865, GNorm = 1.2861, lr_0 = 8.7785e-04
Loss = 1.7363e-01, PNorm = 46.4069, GNorm = 2.0379, lr_0 = 8.7725e-04
Loss = 1.5437e-01, PNorm = 46.4301, GNorm = 0.9335, lr_0 = 8.7665e-04
Loss = 1.6369e-01, PNorm = 46.4531, GNorm = 1.3212, lr_0 = 8.7605e-04
Loss = 1.6710e-01, PNorm = 46.4732, GNorm = 1.1743, lr_0 = 8.7545e-04
Loss = 1.7797e-01, PNorm = 46.4933, GNorm = 3.0621, lr_0 = 8.7485e-04
Loss = 1.7948e-01, PNorm = 46.5133, GNorm = 4.2540, lr_0 = 8.7425e-04
Loss = 1.7013e-01, PNorm = 46.5363, GNorm = 1.2949, lr_0 = 8.7365e-04
Loss = 1.6838e-01, PNorm = 46.5565, GNorm = 1.7634, lr_0 = 8.7306e-04
Loss = 1.6449e-01, PNorm = 46.5827, GNorm = 2.5327, lr_0 = 8.7246e-04
Loss = 1.6294e-01, PNorm = 46.6027, GNorm = 1.3858, lr_0 = 8.7186e-04
Loss = 1.5555e-01, PNorm = 46.6151, GNorm = 1.0238, lr_0 = 8.7126e-04
Loss = 1.6285e-01, PNorm = 46.6337, GNorm = 1.7513, lr_0 = 8.7067e-04
Loss = 1.6020e-01, PNorm = 46.6491, GNorm = 0.8904, lr_0 = 8.7007e-04
Loss = 1.7888e-01, PNorm = 46.6769, GNorm = 3.3281, lr_0 = 8.6947e-04
Loss = 1.8968e-01, PNorm = 46.6946, GNorm = 1.5505, lr_0 = 8.6888e-04
Loss = 1.4672e-01, PNorm = 46.7215, GNorm = 0.8590, lr_0 = 8.6828e-04
Loss = 1.7058e-01, PNorm = 46.7483, GNorm = 1.0074, lr_0 = 8.6769e-04
Loss = 1.5364e-01, PNorm = 46.7564, GNorm = 0.6698, lr_0 = 8.6709e-04
Loss = 1.6572e-01, PNorm = 46.7789, GNorm = 1.9230, lr_0 = 8.6650e-04
Loss = 1.6452e-01, PNorm = 46.7970, GNorm = 1.0691, lr_0 = 8.6590e-04
Loss = 1.5706e-01, PNorm = 46.8128, GNorm = 0.7228, lr_0 = 8.6531e-04
Loss = 1.5226e-01, PNorm = 46.8333, GNorm = 2.0303, lr_0 = 8.6472e-04
Loss = 1.7246e-01, PNorm = 46.8559, GNorm = 2.2071, lr_0 = 8.6413e-04
Loss = 1.6333e-01, PNorm = 46.8812, GNorm = 1.2783, lr_0 = 8.6353e-04
Loss = 1.5188e-01, PNorm = 46.9065, GNorm = 0.8993, lr_0 = 8.6294e-04
Loss = 1.5036e-01, PNorm = 46.9347, GNorm = 0.9583, lr_0 = 8.6235e-04
Loss = 1.8168e-01, PNorm = 46.9491, GNorm = 1.5385, lr_0 = 8.6176e-04
Loss = 1.4445e-01, PNorm = 46.9610, GNorm = 2.2931, lr_0 = 8.6117e-04
Loss = 1.7795e-01, PNorm = 46.9763, GNorm = 1.0407, lr_0 = 8.6058e-04
Loss = 1.4014e-01, PNorm = 46.9926, GNorm = 0.9496, lr_0 = 8.5999e-04
Loss = 2.0652e-01, PNorm = 47.0144, GNorm = 1.1582, lr_0 = 8.5940e-04
Loss = 1.8707e-01, PNorm = 47.0400, GNorm = 2.2143, lr_0 = 8.5881e-04
Loss = 1.6985e-01, PNorm = 47.0654, GNorm = 2.1124, lr_0 = 8.5823e-04
Loss = 1.8031e-01, PNorm = 47.0876, GNorm = 2.2124, lr_0 = 8.5764e-04
Loss = 1.7381e-01, PNorm = 47.1064, GNorm = 3.5204, lr_0 = 8.5705e-04
Loss = 1.6301e-01, PNorm = 47.1243, GNorm = 1.1913, lr_0 = 8.5646e-04
Loss = 1.6430e-01, PNorm = 47.1487, GNorm = 1.6667, lr_0 = 8.5588e-04
Loss = 1.6872e-01, PNorm = 47.1679, GNorm = 2.0519, lr_0 = 8.5529e-04
Loss = 2.1341e-01, PNorm = 47.1904, GNorm = 2.8859, lr_0 = 8.5470e-04
Loss = 1.8251e-01, PNorm = 47.2133, GNorm = 0.7387, lr_0 = 8.5412e-04
Loss = 1.7260e-01, PNorm = 47.2378, GNorm = 0.7027, lr_0 = 8.5353e-04
Loss = 1.5525e-01, PNorm = 47.2597, GNorm = 1.2905, lr_0 = 8.5295e-04
Loss = 1.7121e-01, PNorm = 47.2772, GNorm = 0.7446, lr_0 = 8.5236e-04
Loss = 1.6750e-01, PNorm = 47.3041, GNorm = 1.5676, lr_0 = 8.5178e-04
Loss = 1.5130e-01, PNorm = 47.3292, GNorm = 1.3178, lr_0 = 8.5120e-04
Loss = 1.7538e-01, PNorm = 47.3443, GNorm = 1.0449, lr_0 = 8.5061e-04
Loss = 1.5691e-01, PNorm = 47.3601, GNorm = 0.7438, lr_0 = 8.5003e-04
Loss = 1.5199e-01, PNorm = 47.3839, GNorm = 1.6056, lr_0 = 8.4945e-04
Loss = 1.7598e-01, PNorm = 47.4074, GNorm = 1.9574, lr_0 = 8.4887e-04
Loss = 1.5769e-01, PNorm = 47.4234, GNorm = 1.7608, lr_0 = 8.4828e-04
Validation mae = 0.455859
Epoch 4
Loss = 1.4049e-01, PNorm = 47.4377, GNorm = 0.6598, lr_0 = 8.4770e-04
Loss = 1.5232e-01, PNorm = 47.4493, GNorm = 3.3438, lr_0 = 8.4712e-04
Loss = 1.4631e-01, PNorm = 47.4642, GNorm = 1.8544, lr_0 = 8.4654e-04
Loss = 1.7176e-01, PNorm = 47.4891, GNorm = 1.7487, lr_0 = 8.4596e-04
Loss = 1.6852e-01, PNorm = 47.5148, GNorm = 1.8277, lr_0 = 8.4538e-04
Loss = 1.4649e-01, PNorm = 47.5333, GNorm = 0.9839, lr_0 = 8.4480e-04
Loss = 1.2485e-01, PNorm = 47.5543, GNorm = 1.3373, lr_0 = 8.4423e-04
Loss = 1.2761e-01, PNorm = 47.5761, GNorm = 0.7973, lr_0 = 8.4365e-04
Loss = 1.5878e-01, PNorm = 47.5955, GNorm = 2.9107, lr_0 = 8.4307e-04
Loss = 1.6618e-01, PNorm = 47.6094, GNorm = 1.9098, lr_0 = 8.4249e-04
Loss = 1.5093e-01, PNorm = 47.6349, GNorm = 1.4174, lr_0 = 8.4191e-04
Loss = 1.3069e-01, PNorm = 47.6576, GNorm = 0.9356, lr_0 = 8.4134e-04
Loss = 1.3163e-01, PNorm = 47.6739, GNorm = 0.9961, lr_0 = 8.4076e-04
Loss = 1.5258e-01, PNorm = 47.6939, GNorm = 0.7964, lr_0 = 8.4019e-04
Loss = 1.5365e-01, PNorm = 47.7069, GNorm = 1.7691, lr_0 = 8.3961e-04
Loss = 1.6602e-01, PNorm = 47.7199, GNorm = 1.3101, lr_0 = 8.3903e-04
Loss = 1.7065e-01, PNorm = 47.7406, GNorm = 2.5056, lr_0 = 8.3846e-04
Loss = 1.5180e-01, PNorm = 47.7698, GNorm = 1.5700, lr_0 = 8.3789e-04
Loss = 1.5374e-01, PNorm = 47.7973, GNorm = 0.7689, lr_0 = 8.3731e-04
Loss = 1.6516e-01, PNorm = 47.8200, GNorm = 0.7632, lr_0 = 8.3674e-04
Loss = 1.6003e-01, PNorm = 47.8524, GNorm = 1.2912, lr_0 = 8.3616e-04
Loss = 1.6843e-01, PNorm = 47.8793, GNorm = 4.2441, lr_0 = 8.3559e-04
Loss = 1.5593e-01, PNorm = 47.9063, GNorm = 0.8795, lr_0 = 8.3502e-04
Loss = 1.5237e-01, PNorm = 47.9279, GNorm = 2.8033, lr_0 = 8.3445e-04
Loss = 1.7569e-01, PNorm = 47.9479, GNorm = 1.0461, lr_0 = 8.3388e-04
Loss = 1.8017e-01, PNorm = 47.9739, GNorm = 3.5402, lr_0 = 8.3330e-04
Loss = 1.6639e-01, PNorm = 48.0034, GNorm = 2.0718, lr_0 = 8.3273e-04
Loss = 1.6507e-01, PNorm = 48.0266, GNorm = 2.7549, lr_0 = 8.3216e-04
Loss = 1.5443e-01, PNorm = 48.0453, GNorm = 0.9800, lr_0 = 8.3159e-04
Loss = 1.5445e-01, PNorm = 48.0621, GNorm = 0.9671, lr_0 = 8.3102e-04
Loss = 1.4929e-01, PNorm = 48.0792, GNorm = 1.3922, lr_0 = 8.3045e-04
Loss = 1.4501e-01, PNorm = 48.0969, GNorm = 1.2277, lr_0 = 8.2988e-04
Loss = 1.5469e-01, PNorm = 48.1200, GNorm = 1.0884, lr_0 = 8.2932e-04
Loss = 1.5482e-01, PNorm = 48.1426, GNorm = 1.7696, lr_0 = 8.2875e-04
Loss = 1.3930e-01, PNorm = 48.1597, GNorm = 0.6250, lr_0 = 8.2818e-04
Loss = 1.6481e-01, PNorm = 48.1727, GNorm = 0.7003, lr_0 = 8.2761e-04
Loss = 1.4084e-01, PNorm = 48.1908, GNorm = 1.2263, lr_0 = 8.2705e-04
Loss = 1.6566e-01, PNorm = 48.2171, GNorm = 0.9563, lr_0 = 8.2648e-04
Loss = 1.7215e-01, PNorm = 48.2406, GNorm = 1.2078, lr_0 = 8.2591e-04
Loss = 1.6998e-01, PNorm = 48.2586, GNorm = 1.1824, lr_0 = 8.2535e-04
Loss = 1.6206e-01, PNorm = 48.2814, GNorm = 0.8760, lr_0 = 8.2478e-04
Loss = 1.7098e-01, PNorm = 48.3083, GNorm = 1.3731, lr_0 = 8.2422e-04
Loss = 1.5026e-01, PNorm = 48.3220, GNorm = 2.4547, lr_0 = 8.2365e-04
Loss = 1.4722e-01, PNorm = 48.3324, GNorm = 1.3022, lr_0 = 8.2309e-04
Loss = 1.5014e-01, PNorm = 48.3549, GNorm = 1.1302, lr_0 = 8.2252e-04
Loss = 1.4572e-01, PNorm = 48.3805, GNorm = 2.5714, lr_0 = 8.2196e-04
Loss = 1.5714e-01, PNorm = 48.4002, GNorm = 1.9276, lr_0 = 8.2140e-04
Loss = 1.4846e-01, PNorm = 48.4158, GNorm = 0.9065, lr_0 = 8.2084e-04
Loss = 1.4963e-01, PNorm = 48.4316, GNorm = 1.8899, lr_0 = 8.2027e-04
Loss = 1.3545e-01, PNorm = 48.4425, GNorm = 0.7508, lr_0 = 8.1971e-04
Loss = 1.4048e-01, PNorm = 48.4593, GNorm = 1.8340, lr_0 = 8.1915e-04
Loss = 1.5486e-01, PNorm = 48.4834, GNorm = 2.2535, lr_0 = 8.1859e-04
Loss = 1.7475e-01, PNorm = 48.5091, GNorm = 0.7256, lr_0 = 8.1803e-04
Loss = 1.6647e-01, PNorm = 48.5376, GNorm = 1.1471, lr_0 = 8.1747e-04
Loss = 1.4996e-01, PNorm = 48.5493, GNorm = 2.3301, lr_0 = 8.1691e-04
Loss = 1.5912e-01, PNorm = 48.5655, GNorm = 0.6645, lr_0 = 8.1635e-04
Loss = 1.4697e-01, PNorm = 48.5909, GNorm = 0.9766, lr_0 = 8.1579e-04
Loss = 1.6506e-01, PNorm = 48.6156, GNorm = 1.4325, lr_0 = 8.1523e-04
Loss = 1.3967e-01, PNorm = 48.6351, GNorm = 0.7860, lr_0 = 8.1467e-04
Loss = 1.5353e-01, PNorm = 48.6598, GNorm = 1.6738, lr_0 = 8.1411e-04
Loss = 1.2257e-01, PNorm = 48.6819, GNorm = 0.8374, lr_0 = 8.1355e-04
Loss = 1.4392e-01, PNorm = 48.6994, GNorm = 1.7102, lr_0 = 8.1300e-04
Loss = 1.7227e-01, PNorm = 48.7199, GNorm = 1.3607, lr_0 = 8.1244e-04
Loss = 1.3721e-01, PNorm = 48.7407, GNorm = 0.8556, lr_0 = 8.1188e-04
Loss = 1.3293e-01, PNorm = 48.7617, GNorm = 1.6480, lr_0 = 8.1133e-04
Loss = 1.3174e-01, PNorm = 48.7753, GNorm = 0.8569, lr_0 = 8.1077e-04
Loss = 1.3517e-01, PNorm = 48.7932, GNorm = 1.0986, lr_0 = 8.1022e-04
Loss = 1.6623e-01, PNorm = 48.8152, GNorm = 2.0086, lr_0 = 8.0966e-04
Loss = 1.9032e-01, PNorm = 48.8376, GNorm = 1.1257, lr_0 = 8.0911e-04
Loss = 1.6092e-01, PNorm = 48.8554, GNorm = 1.5636, lr_0 = 8.0855e-04
Loss = 1.9154e-01, PNorm = 48.8707, GNorm = 0.8813, lr_0 = 8.0800e-04
Loss = 1.5409e-01, PNorm = 48.8955, GNorm = 1.4177, lr_0 = 8.0745e-04
Loss = 1.7876e-01, PNorm = 48.9211, GNorm = 0.7335, lr_0 = 8.0689e-04
Loss = 1.8301e-01, PNorm = 48.9445, GNorm = 1.6053, lr_0 = 8.0634e-04
Loss = 1.8302e-01, PNorm = 48.9851, GNorm = 1.0180, lr_0 = 8.0579e-04
Loss = 1.5368e-01, PNorm = 49.0170, GNorm = 1.6286, lr_0 = 8.0523e-04
Loss = 1.3764e-01, PNorm = 49.0361, GNorm = 0.8953, lr_0 = 8.0468e-04
Loss = 1.1201e-01, PNorm = 49.0585, GNorm = 0.8228, lr_0 = 8.0413e-04
Loss = 1.5908e-01, PNorm = 49.0706, GNorm = 0.6668, lr_0 = 8.0358e-04
Loss = 1.5479e-01, PNorm = 49.0846, GNorm = 1.7093, lr_0 = 8.0303e-04
Loss = 1.4389e-01, PNorm = 49.1057, GNorm = 1.5374, lr_0 = 8.0248e-04
Loss = 1.4523e-01, PNorm = 49.1255, GNorm = 0.7404, lr_0 = 8.0193e-04
Loss = 1.3653e-01, PNorm = 49.1415, GNorm = 0.7862, lr_0 = 8.0138e-04
Loss = 1.3195e-01, PNorm = 49.1590, GNorm = 3.0067, lr_0 = 8.0083e-04
Loss = 1.4700e-01, PNorm = 49.1794, GNorm = 2.6453, lr_0 = 8.0028e-04
Loss = 1.3363e-01, PNorm = 49.2049, GNorm = 0.6916, lr_0 = 7.9974e-04
Loss = 1.3512e-01, PNorm = 49.2234, GNorm = 1.8051, lr_0 = 7.9919e-04
Loss = 1.4239e-01, PNorm = 49.2390, GNorm = 1.0387, lr_0 = 7.9864e-04
Loss = 1.3777e-01, PNorm = 49.2513, GNorm = 1.3132, lr_0 = 7.9809e-04
Loss = 1.5192e-01, PNorm = 49.2712, GNorm = 0.8508, lr_0 = 7.9755e-04
Loss = 1.4840e-01, PNorm = 49.2847, GNorm = 0.8314, lr_0 = 7.9700e-04
Loss = 1.4811e-01, PNorm = 49.2979, GNorm = 1.7367, lr_0 = 7.9645e-04
Loss = 1.6543e-01, PNorm = 49.3227, GNorm = 1.2149, lr_0 = 7.9591e-04
Loss = 1.4279e-01, PNorm = 49.3482, GNorm = 0.4901, lr_0 = 7.9536e-04
Loss = 1.5310e-01, PNorm = 49.3641, GNorm = 0.7658, lr_0 = 7.9482e-04
Loss = 2.0171e-01, PNorm = 49.3770, GNorm = 1.5504, lr_0 = 7.9427e-04
Loss = 1.4424e-01, PNorm = 49.4022, GNorm = 1.4848, lr_0 = 7.9373e-04
Loss = 1.2693e-01, PNorm = 49.4222, GNorm = 0.9431, lr_0 = 7.9319e-04
Loss = 1.2937e-01, PNorm = 49.4421, GNorm = 0.9529, lr_0 = 7.9264e-04
Loss = 1.2675e-01, PNorm = 49.4606, GNorm = 0.7986, lr_0 = 7.9210e-04
Loss = 1.4765e-01, PNorm = 49.4812, GNorm = 0.9698, lr_0 = 7.9156e-04
Loss = 1.5416e-01, PNorm = 49.5025, GNorm = 1.5244, lr_0 = 7.9101e-04
Loss = 1.2578e-01, PNorm = 49.5230, GNorm = 0.7410, lr_0 = 7.9047e-04
Loss = 1.4742e-01, PNorm = 49.5356, GNorm = 2.1294, lr_0 = 7.8993e-04
Loss = 1.5775e-01, PNorm = 49.5506, GNorm = 2.9077, lr_0 = 7.8939e-04
Loss = 1.5267e-01, PNorm = 49.5748, GNorm = 0.8945, lr_0 = 7.8885e-04
Loss = 1.3863e-01, PNorm = 49.5932, GNorm = 1.4117, lr_0 = 7.8831e-04
Loss = 1.8022e-01, PNorm = 49.6144, GNorm = 1.7402, lr_0 = 7.8777e-04
Loss = 1.5826e-01, PNorm = 49.6312, GNorm = 0.7584, lr_0 = 7.8723e-04
Loss = 1.5431e-01, PNorm = 49.6453, GNorm = 1.5469, lr_0 = 7.8669e-04
Loss = 1.3892e-01, PNorm = 49.6644, GNorm = 1.1473, lr_0 = 7.8615e-04
Loss = 1.6186e-01, PNorm = 49.6891, GNorm = 2.0398, lr_0 = 7.8561e-04
Loss = 1.7122e-01, PNorm = 49.7115, GNorm = 1.8795, lr_0 = 7.8507e-04
Loss = 1.8494e-01, PNorm = 49.7422, GNorm = 1.8618, lr_0 = 7.8454e-04
Loss = 1.2289e-01, PNorm = 49.7737, GNorm = 1.2165, lr_0 = 7.8400e-04
Loss = 1.6244e-01, PNorm = 49.7859, GNorm = 1.4099, lr_0 = 7.8346e-04
Loss = 1.7001e-01, PNorm = 49.8041, GNorm = 1.5411, lr_0 = 7.8293e-04
Loss = 1.4914e-01, PNorm = 49.8259, GNorm = 0.9079, lr_0 = 7.8239e-04
Loss = 1.9070e-01, PNorm = 49.8382, GNorm = 0.8888, lr_0 = 7.8185e-04
Loss = 1.4103e-01, PNorm = 49.8571, GNorm = 1.6266, lr_0 = 7.8132e-04
Validation mae = 0.469327
Epoch 5
Loss = 1.5230e-01, PNorm = 49.8753, GNorm = 1.2879, lr_0 = 7.8078e-04
Loss = 1.2009e-01, PNorm = 49.8938, GNorm = 1.5762, lr_0 = 7.8025e-04
Loss = 1.3524e-01, PNorm = 49.9169, GNorm = 1.4828, lr_0 = 7.7971e-04
Loss = 1.2449e-01, PNorm = 49.9356, GNorm = 1.8047, lr_0 = 7.7918e-04
Loss = 1.3819e-01, PNorm = 49.9526, GNorm = 1.0122, lr_0 = 7.7864e-04
Loss = 1.3589e-01, PNorm = 49.9715, GNorm = 0.6207, lr_0 = 7.7811e-04
Loss = 1.2724e-01, PNorm = 49.9860, GNorm = 1.0800, lr_0 = 7.7758e-04
Loss = 1.2467e-01, PNorm = 49.9968, GNorm = 0.7921, lr_0 = 7.7705e-04
Loss = 1.5943e-01, PNorm = 50.0129, GNorm = 1.8483, lr_0 = 7.7651e-04
Loss = 1.6312e-01, PNorm = 50.0416, GNorm = 0.5986, lr_0 = 7.7598e-04
Loss = 1.3984e-01, PNorm = 50.0731, GNorm = 1.2429, lr_0 = 7.7545e-04
Loss = 1.4863e-01, PNorm = 50.0975, GNorm = 1.2352, lr_0 = 7.7492e-04
Loss = 1.4899e-01, PNorm = 50.1199, GNorm = 1.0248, lr_0 = 7.7439e-04
Loss = 1.5745e-01, PNorm = 50.1397, GNorm = 0.8811, lr_0 = 7.7386e-04
Loss = 1.5299e-01, PNorm = 50.1618, GNorm = 0.6804, lr_0 = 7.7333e-04
Loss = 1.5112e-01, PNorm = 50.1778, GNorm = 1.8252, lr_0 = 7.7280e-04
Loss = 1.3252e-01, PNorm = 50.1896, GNorm = 1.4666, lr_0 = 7.7227e-04
Loss = 1.3772e-01, PNorm = 50.2099, GNorm = 0.5689, lr_0 = 7.7174e-04
Loss = 1.4246e-01, PNorm = 50.2318, GNorm = 0.9054, lr_0 = 7.7121e-04
Loss = 1.2071e-01, PNorm = 50.2486, GNorm = 0.8754, lr_0 = 7.7068e-04
Loss = 1.4309e-01, PNorm = 50.2616, GNorm = 1.3502, lr_0 = 7.7015e-04
Loss = 1.3551e-01, PNorm = 50.2772, GNorm = 2.2069, lr_0 = 7.6963e-04
Loss = 1.2021e-01, PNorm = 50.2896, GNorm = 0.7879, lr_0 = 7.6910e-04
Loss = 1.2817e-01, PNorm = 50.3061, GNorm = 0.5815, lr_0 = 7.6857e-04
Loss = 1.4296e-01, PNorm = 50.3240, GNorm = 0.6604, lr_0 = 7.6805e-04
Loss = 1.4991e-01, PNorm = 50.3380, GNorm = 0.8060, lr_0 = 7.6752e-04
Loss = 1.4401e-01, PNorm = 50.3565, GNorm = 2.1738, lr_0 = 7.6699e-04
Loss = 1.3976e-01, PNorm = 50.3724, GNorm = 0.5251, lr_0 = 7.6647e-04
Loss = 1.3167e-01, PNorm = 50.3936, GNorm = 1.6149, lr_0 = 7.6594e-04
Loss = 1.4904e-01, PNorm = 50.4042, GNorm = 1.4612, lr_0 = 7.6542e-04
Loss = 1.4659e-01, PNorm = 50.4210, GNorm = 1.4581, lr_0 = 7.6489e-04
Loss = 1.3747e-01, PNorm = 50.4433, GNorm = 0.7923, lr_0 = 7.6437e-04
Loss = 1.4599e-01, PNorm = 50.4648, GNorm = 1.8313, lr_0 = 7.6385e-04
Loss = 1.2481e-01, PNorm = 50.4824, GNorm = 1.1633, lr_0 = 7.6332e-04
Loss = 1.5541e-01, PNorm = 50.5037, GNorm = 1.2977, lr_0 = 7.6280e-04
Loss = 1.4859e-01, PNorm = 50.5281, GNorm = 1.9357, lr_0 = 7.6228e-04
Loss = 1.6948e-01, PNorm = 50.5563, GNorm = 0.9175, lr_0 = 7.6176e-04
Loss = 1.4213e-01, PNorm = 50.5801, GNorm = 0.6967, lr_0 = 7.6123e-04
Loss = 1.5035e-01, PNorm = 50.6011, GNorm = 1.9893, lr_0 = 7.6071e-04
Loss = 1.3627e-01, PNorm = 50.6304, GNorm = 1.1244, lr_0 = 7.6019e-04
Loss = 1.2497e-01, PNorm = 50.6528, GNorm = 0.4728, lr_0 = 7.5967e-04
Loss = 1.2911e-01, PNorm = 50.6616, GNorm = 1.5840, lr_0 = 7.5915e-04
Loss = 1.3253e-01, PNorm = 50.6751, GNorm = 1.0833, lr_0 = 7.5863e-04
Loss = 1.0876e-01, PNorm = 50.6940, GNorm = 0.5492, lr_0 = 7.5811e-04
Loss = 1.5142e-01, PNorm = 50.7139, GNorm = 2.0608, lr_0 = 7.5759e-04
Loss = 1.3985e-01, PNorm = 50.7374, GNorm = 1.6081, lr_0 = 7.5707e-04
Loss = 1.5579e-01, PNorm = 50.7590, GNorm = 0.8596, lr_0 = 7.5655e-04
Loss = 1.2422e-01, PNorm = 50.7804, GNorm = 1.6682, lr_0 = 7.5603e-04
Loss = 1.4103e-01, PNorm = 50.7979, GNorm = 0.8816, lr_0 = 7.5552e-04
Loss = 1.1829e-01, PNorm = 50.8062, GNorm = 1.3054, lr_0 = 7.5500e-04
Loss = 1.4382e-01, PNorm = 50.8184, GNorm = 1.8722, lr_0 = 7.5448e-04
Loss = 1.3792e-01, PNorm = 50.8410, GNorm = 0.9357, lr_0 = 7.5397e-04
Loss = 1.4382e-01, PNorm = 50.8715, GNorm = 1.0041, lr_0 = 7.5345e-04
Loss = 1.4172e-01, PNorm = 50.8930, GNorm = 1.6728, lr_0 = 7.5293e-04
Loss = 1.3247e-01, PNorm = 50.9084, GNorm = 0.6139, lr_0 = 7.5242e-04
Loss = 1.3211e-01, PNorm = 50.9274, GNorm = 0.8501, lr_0 = 7.5190e-04
Loss = 1.3506e-01, PNorm = 50.9424, GNorm = 1.1853, lr_0 = 7.5139e-04
Loss = 1.6222e-01, PNorm = 50.9506, GNorm = 1.8931, lr_0 = 7.5087e-04
Loss = 1.8165e-01, PNorm = 50.9671, GNorm = 4.2017, lr_0 = 7.5036e-04
Loss = 1.7430e-01, PNorm = 51.0020, GNorm = 0.4887, lr_0 = 7.4984e-04
Loss = 1.5293e-01, PNorm = 51.0321, GNorm = 1.0590, lr_0 = 7.4933e-04
Loss = 1.4916e-01, PNorm = 51.0504, GNorm = 0.8265, lr_0 = 7.4882e-04
Loss = 1.5693e-01, PNorm = 51.0775, GNorm = 1.0901, lr_0 = 7.4830e-04
Loss = 1.5746e-01, PNorm = 51.1017, GNorm = 1.5666, lr_0 = 7.4779e-04
Loss = 1.4730e-01, PNorm = 51.1163, GNorm = 1.3965, lr_0 = 7.4728e-04
Loss = 1.3228e-01, PNorm = 51.1258, GNorm = 2.2848, lr_0 = 7.4677e-04
Loss = 1.5918e-01, PNorm = 51.1433, GNorm = 1.8835, lr_0 = 7.4625e-04
Loss = 1.7308e-01, PNorm = 51.1658, GNorm = 1.4091, lr_0 = 7.4574e-04
Loss = 1.4160e-01, PNorm = 51.1892, GNorm = 0.8068, lr_0 = 7.4523e-04
Loss = 1.5764e-01, PNorm = 51.2021, GNorm = 1.1190, lr_0 = 7.4472e-04
Loss = 1.4443e-01, PNorm = 51.2241, GNorm = 0.8109, lr_0 = 7.4421e-04
Loss = 1.2479e-01, PNorm = 51.2358, GNorm = 1.6388, lr_0 = 7.4370e-04
Loss = 1.2574e-01, PNorm = 51.2460, GNorm = 0.9392, lr_0 = 7.4319e-04
Loss = 1.2380e-01, PNorm = 51.2583, GNorm = 1.0791, lr_0 = 7.4268e-04
Loss = 1.3210e-01, PNorm = 51.2781, GNorm = 0.6906, lr_0 = 7.4217e-04
Loss = 1.0797e-01, PNorm = 51.2901, GNorm = 1.0709, lr_0 = 7.4167e-04
Loss = 1.2497e-01, PNorm = 51.3036, GNorm = 1.1276, lr_0 = 7.4116e-04
Loss = 1.3785e-01, PNorm = 51.3191, GNorm = 1.8936, lr_0 = 7.4065e-04
Loss = 1.5819e-01, PNorm = 51.3356, GNorm = 1.3240, lr_0 = 7.4014e-04
Loss = 1.1496e-01, PNorm = 51.3512, GNorm = 1.8577, lr_0 = 7.3964e-04
Loss = 1.2557e-01, PNorm = 51.3718, GNorm = 0.8617, lr_0 = 7.3913e-04
Loss = 1.4588e-01, PNorm = 51.3802, GNorm = 0.9072, lr_0 = 7.3862e-04
Loss = 1.3393e-01, PNorm = 51.3949, GNorm = 1.0494, lr_0 = 7.3812e-04
Loss = 1.4771e-01, PNorm = 51.4108, GNorm = 0.7572, lr_0 = 7.3761e-04
Loss = 1.3225e-01, PNorm = 51.4214, GNorm = 0.7447, lr_0 = 7.3711e-04
Loss = 1.3771e-01, PNorm = 51.4320, GNorm = 0.6927, lr_0 = 7.3660e-04
Loss = 1.3193e-01, PNorm = 51.4407, GNorm = 0.4742, lr_0 = 7.3610e-04
Loss = 1.2883e-01, PNorm = 51.4558, GNorm = 0.7902, lr_0 = 7.3559e-04
Loss = 1.4282e-01, PNorm = 51.4700, GNorm = 0.6328, lr_0 = 7.3509e-04
Loss = 1.2259e-01, PNorm = 51.4848, GNorm = 1.1793, lr_0 = 7.3458e-04
Loss = 1.1194e-01, PNorm = 51.4981, GNorm = 1.5256, lr_0 = 7.3408e-04
Loss = 1.4939e-01, PNorm = 51.5150, GNorm = 1.2832, lr_0 = 7.3358e-04
Loss = 1.3816e-01, PNorm = 51.5340, GNorm = 2.9606, lr_0 = 7.3308e-04
Loss = 1.5638e-01, PNorm = 51.5523, GNorm = 0.7789, lr_0 = 7.3257e-04
Loss = 1.4547e-01, PNorm = 51.5714, GNorm = 1.2297, lr_0 = 7.3207e-04
Loss = 1.2335e-01, PNorm = 51.5951, GNorm = 1.0626, lr_0 = 7.3157e-04
Loss = 1.1866e-01, PNorm = 51.6185, GNorm = 0.6257, lr_0 = 7.3107e-04
Loss = 1.2847e-01, PNorm = 51.6305, GNorm = 1.7192, lr_0 = 7.3057e-04
Loss = 1.3406e-01, PNorm = 51.6392, GNorm = 0.5274, lr_0 = 7.3007e-04
Loss = 1.4166e-01, PNorm = 51.6483, GNorm = 0.6648, lr_0 = 7.2957e-04
Loss = 1.2802e-01, PNorm = 51.6628, GNorm = 0.7488, lr_0 = 7.2907e-04
Loss = 1.4257e-01, PNorm = 51.6810, GNorm = 0.9687, lr_0 = 7.2857e-04
Loss = 1.2604e-01, PNorm = 51.7030, GNorm = 0.7233, lr_0 = 7.2807e-04
Loss = 1.1681e-01, PNorm = 51.7229, GNorm = 0.5425, lr_0 = 7.2757e-04
Loss = 1.4683e-01, PNorm = 51.7444, GNorm = 0.9671, lr_0 = 7.2707e-04
Loss = 1.5394e-01, PNorm = 51.7660, GNorm = 0.7177, lr_0 = 7.2657e-04
Loss = 1.4827e-01, PNorm = 51.7831, GNorm = 0.9777, lr_0 = 7.2608e-04
Loss = 1.4615e-01, PNorm = 51.8008, GNorm = 3.0220, lr_0 = 7.2558e-04
Loss = 1.5331e-01, PNorm = 51.8221, GNorm = 1.3476, lr_0 = 7.2508e-04
Loss = 1.4199e-01, PNorm = 51.8445, GNorm = 1.3200, lr_0 = 7.2458e-04
Loss = 1.4270e-01, PNorm = 51.8733, GNorm = 1.2495, lr_0 = 7.2409e-04
Loss = 1.4163e-01, PNorm = 51.8940, GNorm = 0.8698, lr_0 = 7.2359e-04
Loss = 1.2507e-01, PNorm = 51.9101, GNorm = 2.1102, lr_0 = 7.2310e-04
Loss = 1.2807e-01, PNorm = 51.9267, GNorm = 1.2722, lr_0 = 7.2260e-04
Loss = 1.4312e-01, PNorm = 51.9401, GNorm = 0.5741, lr_0 = 7.2211e-04
Loss = 1.3082e-01, PNorm = 51.9555, GNorm = 1.0701, lr_0 = 7.2161e-04
Loss = 1.2708e-01, PNorm = 51.9624, GNorm = 0.8104, lr_0 = 7.2112e-04
Loss = 1.1910e-01, PNorm = 51.9700, GNorm = 0.6020, lr_0 = 7.2062e-04
Loss = 1.2872e-01, PNorm = 51.9813, GNorm = 1.7465, lr_0 = 7.2013e-04
Loss = 1.6439e-01, PNorm = 52.0014, GNorm = 1.1086, lr_0 = 7.1964e-04
Validation mae = 0.437915
Epoch 6
Loss = 1.4598e-01, PNorm = 52.0193, GNorm = 1.2840, lr_0 = 7.1914e-04
Loss = 1.2552e-01, PNorm = 52.0432, GNorm = 0.6428, lr_0 = 7.1865e-04
Loss = 1.6518e-01, PNorm = 52.0652, GNorm = 1.9885, lr_0 = 7.1816e-04
Loss = 1.3113e-01, PNorm = 52.0868, GNorm = 1.7815, lr_0 = 7.1767e-04
Loss = 1.4182e-01, PNorm = 52.1096, GNorm = 1.3955, lr_0 = 7.1717e-04
Loss = 1.2592e-01, PNorm = 52.1324, GNorm = 0.8005, lr_0 = 7.1668e-04
Loss = 1.2947e-01, PNorm = 52.1425, GNorm = 0.6503, lr_0 = 7.1619e-04
Loss = 1.3834e-01, PNorm = 52.1589, GNorm = 1.4001, lr_0 = 7.1570e-04
Loss = 1.3640e-01, PNorm = 52.1773, GNorm = 1.6575, lr_0 = 7.1521e-04
Loss = 1.1663e-01, PNorm = 52.1952, GNorm = 1.9828, lr_0 = 7.1472e-04
Loss = 1.3673e-01, PNorm = 52.2137, GNorm = 0.6771, lr_0 = 7.1423e-04
Loss = 1.4516e-01, PNorm = 52.2368, GNorm = 0.8200, lr_0 = 7.1374e-04
Loss = 1.3422e-01, PNorm = 52.2587, GNorm = 0.9541, lr_0 = 7.1325e-04
Loss = 1.1376e-01, PNorm = 52.2791, GNorm = 0.5720, lr_0 = 7.1277e-04
Loss = 1.1816e-01, PNorm = 52.2974, GNorm = 0.9729, lr_0 = 7.1228e-04
Loss = 1.1829e-01, PNorm = 52.3198, GNorm = 0.4671, lr_0 = 7.1179e-04
Loss = 1.4136e-01, PNorm = 52.3364, GNorm = 0.9275, lr_0 = 7.1130e-04
Loss = 1.2682e-01, PNorm = 52.3575, GNorm = 0.8436, lr_0 = 7.1081e-04
Loss = 1.2851e-01, PNorm = 52.3725, GNorm = 1.5994, lr_0 = 7.1033e-04
Loss = 1.3065e-01, PNorm = 52.3846, GNorm = 0.6962, lr_0 = 7.0984e-04
Loss = 1.3082e-01, PNorm = 52.4031, GNorm = 1.0941, lr_0 = 7.0935e-04
Loss = 1.1979e-01, PNorm = 52.4158, GNorm = 1.0902, lr_0 = 7.0887e-04
Loss = 1.1719e-01, PNorm = 52.4321, GNorm = 0.9794, lr_0 = 7.0838e-04
Loss = 1.0872e-01, PNorm = 52.4510, GNorm = 0.6370, lr_0 = 7.0790e-04
Loss = 1.2253e-01, PNorm = 52.4703, GNorm = 2.1258, lr_0 = 7.0741e-04
Loss = 1.3518e-01, PNorm = 52.4865, GNorm = 0.8214, lr_0 = 7.0693e-04
Loss = 1.2212e-01, PNorm = 52.5077, GNorm = 0.8206, lr_0 = 7.0644e-04
Loss = 1.3098e-01, PNorm = 52.5223, GNorm = 0.5921, lr_0 = 7.0596e-04
Loss = 1.1986e-01, PNorm = 52.5382, GNorm = 1.0977, lr_0 = 7.0548e-04
Loss = 1.4150e-01, PNorm = 52.5531, GNorm = 0.7508, lr_0 = 7.0499e-04
Loss = 1.1861e-01, PNorm = 52.5726, GNorm = 1.0413, lr_0 = 7.0451e-04
Loss = 1.1390e-01, PNorm = 52.5982, GNorm = 0.6290, lr_0 = 7.0403e-04
Loss = 1.2458e-01, PNorm = 52.6139, GNorm = 0.8002, lr_0 = 7.0354e-04
Loss = 1.2125e-01, PNorm = 52.6227, GNorm = 1.0792, lr_0 = 7.0306e-04
Loss = 1.4688e-01, PNorm = 52.6432, GNorm = 1.0215, lr_0 = 7.0258e-04
Loss = 1.2388e-01, PNorm = 52.6582, GNorm = 0.8534, lr_0 = 7.0210e-04
Loss = 1.2198e-01, PNorm = 52.6731, GNorm = 1.0923, lr_0 = 7.0162e-04
Loss = 1.0818e-01, PNorm = 52.6933, GNorm = 0.9699, lr_0 = 7.0114e-04
Loss = 1.2522e-01, PNorm = 52.7126, GNorm = 0.6132, lr_0 = 7.0066e-04
Loss = 1.3161e-01, PNorm = 52.7286, GNorm = 1.2349, lr_0 = 7.0018e-04
Loss = 1.0988e-01, PNorm = 52.7491, GNorm = 0.6656, lr_0 = 6.9970e-04
Loss = 1.3417e-01, PNorm = 52.7618, GNorm = 0.5454, lr_0 = 6.9922e-04
Loss = 1.2777e-01, PNorm = 52.7737, GNorm = 0.5424, lr_0 = 6.9874e-04
Loss = 1.2725e-01, PNorm = 52.7916, GNorm = 0.9649, lr_0 = 6.9826e-04
Loss = 1.4050e-01, PNorm = 52.8112, GNorm = 0.8062, lr_0 = 6.9778e-04
Loss = 1.1491e-01, PNorm = 52.8313, GNorm = 0.6837, lr_0 = 6.9730e-04
Loss = 1.3241e-01, PNorm = 52.8466, GNorm = 1.2610, lr_0 = 6.9683e-04
Loss = 1.2299e-01, PNorm = 52.8570, GNorm = 1.2781, lr_0 = 6.9635e-04
Loss = 1.0815e-01, PNorm = 52.8681, GNorm = 0.7597, lr_0 = 6.9587e-04
Loss = 1.3109e-01, PNorm = 52.8847, GNorm = 0.8670, lr_0 = 6.9540e-04
Loss = 1.4524e-01, PNorm = 52.9053, GNorm = 1.2628, lr_0 = 6.9492e-04
Loss = 1.2437e-01, PNorm = 52.9245, GNorm = 0.6561, lr_0 = 6.9444e-04
Loss = 1.1648e-01, PNorm = 52.9376, GNorm = 1.0672, lr_0 = 6.9397e-04
Loss = 1.1426e-01, PNorm = 52.9581, GNorm = 0.9500, lr_0 = 6.9349e-04
Loss = 1.2814e-01, PNorm = 52.9772, GNorm = 0.5785, lr_0 = 6.9302e-04
Loss = 1.3563e-01, PNorm = 52.9972, GNorm = 1.0710, lr_0 = 6.9254e-04
Loss = 1.1805e-01, PNorm = 53.0101, GNorm = 0.7047, lr_0 = 6.9207e-04
Loss = 1.4326e-01, PNorm = 53.0252, GNorm = 0.8031, lr_0 = 6.9159e-04
Loss = 1.5944e-01, PNorm = 53.0426, GNorm = 1.3413, lr_0 = 6.9112e-04
Loss = 1.4535e-01, PNorm = 53.0621, GNorm = 0.9042, lr_0 = 6.9065e-04
Loss = 1.3894e-01, PNorm = 53.0823, GNorm = 1.3937, lr_0 = 6.9017e-04
Loss = 1.3403e-01, PNorm = 53.1056, GNorm = 0.6596, lr_0 = 6.8970e-04
Loss = 1.4852e-01, PNorm = 53.1334, GNorm = 0.8448, lr_0 = 6.8923e-04
Loss = 1.3881e-01, PNorm = 53.1597, GNorm = 1.1660, lr_0 = 6.8876e-04
Loss = 1.2969e-01, PNorm = 53.1819, GNorm = 0.7459, lr_0 = 6.8828e-04
Loss = 1.2237e-01, PNorm = 53.2012, GNorm = 0.6225, lr_0 = 6.8781e-04
Loss = 1.3168e-01, PNorm = 53.2194, GNorm = 1.0879, lr_0 = 6.8734e-04
Loss = 1.3091e-01, PNorm = 53.2341, GNorm = 0.7356, lr_0 = 6.8687e-04
Loss = 1.2652e-01, PNorm = 53.2451, GNorm = 0.9644, lr_0 = 6.8640e-04
Loss = 1.2564e-01, PNorm = 53.2511, GNorm = 0.5709, lr_0 = 6.8593e-04
Loss = 1.3338e-01, PNorm = 53.2632, GNorm = 0.7136, lr_0 = 6.8546e-04
Loss = 1.2226e-01, PNorm = 53.2824, GNorm = 1.8794, lr_0 = 6.8499e-04
Loss = 1.2793e-01, PNorm = 53.3033, GNorm = 0.7567, lr_0 = 6.8452e-04
Loss = 1.3280e-01, PNorm = 53.3214, GNorm = 1.2206, lr_0 = 6.8405e-04
Loss = 1.3085e-01, PNorm = 53.3348, GNorm = 1.3235, lr_0 = 6.8358e-04
Loss = 1.2759e-01, PNorm = 53.3508, GNorm = 0.6522, lr_0 = 6.8312e-04
Loss = 1.1287e-01, PNorm = 53.3631, GNorm = 0.6530, lr_0 = 6.8265e-04
Loss = 1.1232e-01, PNorm = 53.3765, GNorm = 1.0434, lr_0 = 6.8218e-04
Loss = 1.3233e-01, PNorm = 53.3871, GNorm = 0.8352, lr_0 = 6.8171e-04
Loss = 1.2221e-01, PNorm = 53.4052, GNorm = 0.7688, lr_0 = 6.8125e-04
Loss = 1.4032e-01, PNorm = 53.4228, GNorm = 1.0468, lr_0 = 6.8078e-04
Loss = 1.3498e-01, PNorm = 53.4388, GNorm = 1.2437, lr_0 = 6.8031e-04
Loss = 1.3257e-01, PNorm = 53.4526, GNorm = 1.7334, lr_0 = 6.7985e-04
Loss = 1.1741e-01, PNorm = 53.4649, GNorm = 1.6558, lr_0 = 6.7938e-04
Loss = 1.0815e-01, PNorm = 53.4774, GNorm = 0.5552, lr_0 = 6.7892e-04
Loss = 1.2173e-01, PNorm = 53.4940, GNorm = 0.5433, lr_0 = 6.7845e-04
Loss = 1.2291e-01, PNorm = 53.5079, GNorm = 0.6928, lr_0 = 6.7799e-04
Loss = 1.4183e-01, PNorm = 53.5186, GNorm = 1.0085, lr_0 = 6.7752e-04
Loss = 1.3407e-01, PNorm = 53.5408, GNorm = 0.7323, lr_0 = 6.7706e-04
Loss = 1.3170e-01, PNorm = 53.5659, GNorm = 0.8274, lr_0 = 6.7659e-04
Loss = 1.1782e-01, PNorm = 53.5793, GNorm = 0.8902, lr_0 = 6.7613e-04
Loss = 1.4443e-01, PNorm = 53.5912, GNorm = 2.2010, lr_0 = 6.7567e-04
Loss = 1.4309e-01, PNorm = 53.6061, GNorm = 1.2084, lr_0 = 6.7520e-04
Loss = 1.4294e-01, PNorm = 53.6201, GNorm = 2.0816, lr_0 = 6.7474e-04
Loss = 1.2460e-01, PNorm = 53.6316, GNorm = 0.9396, lr_0 = 6.7428e-04
Loss = 1.1389e-01, PNorm = 53.6456, GNorm = 1.3284, lr_0 = 6.7382e-04
Loss = 1.2379e-01, PNorm = 53.6643, GNorm = 0.7715, lr_0 = 6.7335e-04
Loss = 1.0660e-01, PNorm = 53.6833, GNorm = 1.6395, lr_0 = 6.7289e-04
Loss = 1.1693e-01, PNorm = 53.6967, GNorm = 0.7975, lr_0 = 6.7243e-04
Loss = 1.2366e-01, PNorm = 53.7134, GNorm = 0.6812, lr_0 = 6.7197e-04
Loss = 1.2885e-01, PNorm = 53.7284, GNorm = 0.7411, lr_0 = 6.7151e-04
Loss = 1.2153e-01, PNorm = 53.7390, GNorm = 2.2387, lr_0 = 6.7105e-04
Loss = 1.1176e-01, PNorm = 53.7497, GNorm = 1.3400, lr_0 = 6.7059e-04
Loss = 1.1055e-01, PNorm = 53.7666, GNorm = 0.7865, lr_0 = 6.7013e-04
Loss = 1.1929e-01, PNorm = 53.7871, GNorm = 0.6683, lr_0 = 6.6967e-04
Loss = 1.2767e-01, PNorm = 53.8033, GNorm = 1.4489, lr_0 = 6.6921e-04
Loss = 1.2984e-01, PNorm = 53.8145, GNorm = 1.1221, lr_0 = 6.6876e-04
Loss = 1.3439e-01, PNorm = 53.8319, GNorm = 1.0741, lr_0 = 6.6830e-04
Loss = 2.0492e-01, PNorm = 53.8492, GNorm = 2.8520, lr_0 = 6.6784e-04
Loss = 1.6881e-01, PNorm = 53.8827, GNorm = 1.0576, lr_0 = 6.6738e-04
Loss = 1.6106e-01, PNorm = 53.9164, GNorm = 1.0652, lr_0 = 6.6693e-04
Loss = 1.3296e-01, PNorm = 53.9425, GNorm = 0.8949, lr_0 = 6.6647e-04
Loss = 1.4547e-01, PNorm = 53.9607, GNorm = 0.8278, lr_0 = 6.6601e-04
Loss = 1.2644e-01, PNorm = 53.9777, GNorm = 1.8339, lr_0 = 6.6556e-04
Loss = 1.4244e-01, PNorm = 53.9961, GNorm = 0.8511, lr_0 = 6.6510e-04
Loss = 1.4039e-01, PNorm = 54.0132, GNorm = 1.0227, lr_0 = 6.6464e-04
Loss = 1.5017e-01, PNorm = 54.0338, GNorm = 0.6522, lr_0 = 6.6419e-04
Loss = 1.3316e-01, PNorm = 54.0537, GNorm = 0.8440, lr_0 = 6.6373e-04
Loss = 1.2933e-01, PNorm = 54.0771, GNorm = 0.8564, lr_0 = 6.6328e-04
Loss = 1.2255e-01, PNorm = 54.0896, GNorm = 0.8723, lr_0 = 6.6282e-04
Validation mae = 0.435278
Epoch 7
Loss = 1.2675e-01, PNorm = 54.1030, GNorm = 0.6318, lr_0 = 6.6237e-04
Loss = 1.0423e-01, PNorm = 54.1149, GNorm = 0.5749, lr_0 = 6.6192e-04
Loss = 1.0357e-01, PNorm = 54.1295, GNorm = 0.9880, lr_0 = 6.6146e-04
Loss = 9.7980e-02, PNorm = 54.1412, GNorm = 0.6727, lr_0 = 6.6101e-04
Loss = 1.3274e-01, PNorm = 54.1510, GNorm = 1.2419, lr_0 = 6.6056e-04
Loss = 1.1062e-01, PNorm = 54.1675, GNorm = 1.0337, lr_0 = 6.6011e-04
Loss = 1.1293e-01, PNorm = 54.1784, GNorm = 0.6689, lr_0 = 6.5965e-04
Loss = 1.4366e-01, PNorm = 54.1902, GNorm = 1.6312, lr_0 = 6.5920e-04
Loss = 1.0709e-01, PNorm = 54.2091, GNorm = 0.4618, lr_0 = 6.5875e-04
Loss = 1.2714e-01, PNorm = 54.2265, GNorm = 0.8326, lr_0 = 6.5830e-04
Loss = 1.4189e-01, PNorm = 54.2426, GNorm = 0.9902, lr_0 = 6.5785e-04
Loss = 1.1371e-01, PNorm = 54.2617, GNorm = 0.7282, lr_0 = 6.5740e-04
Loss = 1.3198e-01, PNorm = 54.2797, GNorm = 0.8753, lr_0 = 6.5695e-04
Loss = 1.1878e-01, PNorm = 54.3022, GNorm = 0.6841, lr_0 = 6.5650e-04
Loss = 1.4220e-01, PNorm = 54.3172, GNorm = 0.7572, lr_0 = 6.5605e-04
Loss = 1.1614e-01, PNorm = 54.3349, GNorm = 0.7260, lr_0 = 6.5560e-04
Loss = 1.1799e-01, PNorm = 54.3536, GNorm = 0.6315, lr_0 = 6.5515e-04
Loss = 1.2554e-01, PNorm = 54.3709, GNorm = 1.2663, lr_0 = 6.5470e-04
Loss = 1.2191e-01, PNorm = 54.3873, GNorm = 0.6478, lr_0 = 6.5425e-04
Loss = 1.2593e-01, PNorm = 54.4040, GNorm = 0.6095, lr_0 = 6.5380e-04
Loss = 1.0963e-01, PNorm = 54.4169, GNorm = 1.7433, lr_0 = 6.5335e-04
Loss = 1.2426e-01, PNorm = 54.4290, GNorm = 1.5651, lr_0 = 6.5291e-04
Loss = 1.1682e-01, PNorm = 54.4488, GNorm = 0.7089, lr_0 = 6.5246e-04
Loss = 1.2223e-01, PNorm = 54.4728, GNorm = 0.5489, lr_0 = 6.5201e-04
Loss = 1.2174e-01, PNorm = 54.4906, GNorm = 1.1973, lr_0 = 6.5157e-04
Loss = 1.3654e-01, PNorm = 54.5052, GNorm = 0.7361, lr_0 = 6.5112e-04
Loss = 1.1693e-01, PNorm = 54.5186, GNorm = 0.7319, lr_0 = 6.5067e-04
Loss = 1.1188e-01, PNorm = 54.5312, GNorm = 0.5910, lr_0 = 6.5023e-04
Loss = 1.0674e-01, PNorm = 54.5468, GNorm = 0.6251, lr_0 = 6.4978e-04
Loss = 1.0600e-01, PNorm = 54.5600, GNorm = 1.0285, lr_0 = 6.4934e-04
Loss = 1.2116e-01, PNorm = 54.5788, GNorm = 1.7533, lr_0 = 6.4889e-04
Loss = 1.2523e-01, PNorm = 54.5964, GNorm = 1.2860, lr_0 = 6.4845e-04
Loss = 1.1179e-01, PNorm = 54.6097, GNorm = 0.6442, lr_0 = 6.4800e-04
Loss = 1.2887e-01, PNorm = 54.6248, GNorm = 0.8055, lr_0 = 6.4756e-04
Loss = 1.0306e-01, PNorm = 54.6407, GNorm = 0.9511, lr_0 = 6.4712e-04
Loss = 1.4373e-01, PNorm = 54.6555, GNorm = 1.9513, lr_0 = 6.4667e-04
Loss = 1.4921e-01, PNorm = 54.6726, GNorm = 1.6794, lr_0 = 6.4623e-04
Loss = 1.0717e-01, PNorm = 54.6912, GNorm = 1.6984, lr_0 = 6.4579e-04
Loss = 1.2626e-01, PNorm = 54.7122, GNorm = 1.0323, lr_0 = 6.4534e-04
Loss = 1.2543e-01, PNorm = 54.7278, GNorm = 1.9688, lr_0 = 6.4490e-04
Loss = 1.2272e-01, PNorm = 54.7380, GNorm = 1.8264, lr_0 = 6.4446e-04
Loss = 1.1873e-01, PNorm = 54.7541, GNorm = 0.9030, lr_0 = 6.4402e-04
Loss = 1.1212e-01, PNorm = 54.7710, GNorm = 1.3539, lr_0 = 6.4358e-04
Loss = 1.1204e-01, PNorm = 54.7846, GNorm = 1.3256, lr_0 = 6.4314e-04
Loss = 1.2055e-01, PNorm = 54.7955, GNorm = 0.6616, lr_0 = 6.4270e-04
Loss = 1.0572e-01, PNorm = 54.8050, GNorm = 0.4776, lr_0 = 6.4226e-04
Loss = 1.0433e-01, PNorm = 54.8135, GNorm = 0.7778, lr_0 = 6.4182e-04
Loss = 1.3551e-01, PNorm = 54.8248, GNorm = 0.8285, lr_0 = 6.4138e-04
Loss = 1.1353e-01, PNorm = 54.8415, GNorm = 0.5664, lr_0 = 6.4094e-04
Loss = 1.2412e-01, PNorm = 54.8578, GNorm = 0.6451, lr_0 = 6.4050e-04
Loss = 1.0207e-01, PNorm = 54.8751, GNorm = 0.7586, lr_0 = 6.4006e-04
Loss = 1.0030e-01, PNorm = 54.8833, GNorm = 0.5969, lr_0 = 6.3962e-04
Loss = 1.1868e-01, PNorm = 54.8993, GNorm = 1.2084, lr_0 = 6.3918e-04
Loss = 1.4637e-01, PNorm = 54.9213, GNorm = 0.7448, lr_0 = 6.3874e-04
Loss = 1.1629e-01, PNorm = 54.9374, GNorm = 0.8735, lr_0 = 6.3831e-04
Loss = 1.0668e-01, PNorm = 54.9493, GNorm = 0.9519, lr_0 = 6.3787e-04
Loss = 1.2481e-01, PNorm = 54.9598, GNorm = 0.7379, lr_0 = 6.3743e-04
Loss = 1.3698e-01, PNorm = 54.9754, GNorm = 0.6924, lr_0 = 6.3700e-04
Loss = 1.1663e-01, PNorm = 54.9899, GNorm = 1.0371, lr_0 = 6.3656e-04
Loss = 1.2084e-01, PNorm = 55.0044, GNorm = 0.7438, lr_0 = 6.3612e-04
Loss = 1.2801e-01, PNorm = 55.0191, GNorm = 1.0315, lr_0 = 6.3569e-04
Loss = 1.0478e-01, PNorm = 55.0283, GNorm = 0.9497, lr_0 = 6.3525e-04
Loss = 1.2896e-01, PNorm = 55.0438, GNorm = 0.6987, lr_0 = 6.3482e-04
Loss = 1.2238e-01, PNorm = 55.0623, GNorm = 0.8025, lr_0 = 6.3438e-04
Loss = 1.2054e-01, PNorm = 55.0820, GNorm = 0.9592, lr_0 = 6.3395e-04
Loss = 1.2689e-01, PNorm = 55.1027, GNorm = 1.0019, lr_0 = 6.3351e-04
Loss = 1.2105e-01, PNorm = 55.1183, GNorm = 1.1294, lr_0 = 6.3308e-04
Loss = 1.4049e-01, PNorm = 55.1380, GNorm = 0.6231, lr_0 = 6.3265e-04
Loss = 1.3746e-01, PNorm = 55.1624, GNorm = 0.6380, lr_0 = 6.3221e-04
Loss = 1.2150e-01, PNorm = 55.1829, GNorm = 0.5463, lr_0 = 6.3178e-04
Loss = 1.0521e-01, PNorm = 55.2030, GNorm = 0.5199, lr_0 = 6.3135e-04
Loss = 1.1437e-01, PNorm = 55.2194, GNorm = 0.6880, lr_0 = 6.3091e-04
Loss = 1.1337e-01, PNorm = 55.2347, GNorm = 0.8494, lr_0 = 6.3048e-04
Loss = 1.0116e-01, PNorm = 55.2446, GNorm = 0.8626, lr_0 = 6.3005e-04
Loss = 1.1826e-01, PNorm = 55.2504, GNorm = 0.8332, lr_0 = 6.2962e-04
Loss = 1.2475e-01, PNorm = 55.2583, GNorm = 0.8952, lr_0 = 6.2919e-04
Loss = 1.2757e-01, PNorm = 55.2788, GNorm = 0.9377, lr_0 = 6.2876e-04
Loss = 1.2910e-01, PNorm = 55.2987, GNorm = 1.0186, lr_0 = 6.2833e-04
Loss = 1.1594e-01, PNorm = 55.3193, GNorm = 0.8762, lr_0 = 6.2789e-04
Loss = 1.1333e-01, PNorm = 55.3364, GNorm = 0.6678, lr_0 = 6.2746e-04
Loss = 1.1266e-01, PNorm = 55.3537, GNorm = 0.6827, lr_0 = 6.2703e-04
Loss = 1.1412e-01, PNorm = 55.3608, GNorm = 1.1402, lr_0 = 6.2661e-04
Loss = 1.0386e-01, PNorm = 55.3690, GNorm = 0.8153, lr_0 = 6.2618e-04
Loss = 1.2424e-01, PNorm = 55.3843, GNorm = 1.3207, lr_0 = 6.2575e-04
Loss = 1.2297e-01, PNorm = 55.3946, GNorm = 0.6985, lr_0 = 6.2532e-04
Loss = 1.2064e-01, PNorm = 55.4070, GNorm = 0.5922, lr_0 = 6.2489e-04
Loss = 1.2602e-01, PNorm = 55.4168, GNorm = 0.7296, lr_0 = 6.2446e-04
Loss = 9.6787e-02, PNorm = 55.4277, GNorm = 0.6174, lr_0 = 6.2403e-04
Loss = 9.5814e-02, PNorm = 55.4339, GNorm = 0.6049, lr_0 = 6.2361e-04
Loss = 1.2045e-01, PNorm = 55.4432, GNorm = 0.9386, lr_0 = 6.2318e-04
Loss = 1.0876e-01, PNorm = 55.4549, GNorm = 0.6639, lr_0 = 6.2275e-04
Loss = 1.2705e-01, PNorm = 55.4667, GNorm = 0.7861, lr_0 = 6.2233e-04
Loss = 1.0085e-01, PNorm = 55.4814, GNorm = 0.6022, lr_0 = 6.2190e-04
Loss = 1.3035e-01, PNorm = 55.4932, GNorm = 1.4683, lr_0 = 6.2147e-04
Loss = 1.2051e-01, PNorm = 55.5084, GNorm = 0.7015, lr_0 = 6.2105e-04
Loss = 1.1559e-01, PNorm = 55.5252, GNorm = 1.3046, lr_0 = 6.2062e-04
Loss = 1.2816e-01, PNorm = 55.5390, GNorm = 0.6323, lr_0 = 6.2020e-04
Loss = 1.0247e-01, PNorm = 55.5616, GNorm = 1.1540, lr_0 = 6.1977e-04
Loss = 1.1644e-01, PNorm = 55.5763, GNorm = 1.5312, lr_0 = 6.1935e-04
Loss = 1.4543e-01, PNorm = 55.5882, GNorm = 1.0432, lr_0 = 6.1892e-04
Loss = 1.2306e-01, PNorm = 55.6008, GNorm = 0.7958, lr_0 = 6.1850e-04
Loss = 1.1262e-01, PNorm = 55.6187, GNorm = 1.3461, lr_0 = 6.1808e-04
Loss = 1.2456e-01, PNorm = 55.6342, GNorm = 1.0781, lr_0 = 6.1765e-04
Loss = 1.2291e-01, PNorm = 55.6493, GNorm = 1.3569, lr_0 = 6.1723e-04
Loss = 1.1451e-01, PNorm = 55.6693, GNorm = 1.1420, lr_0 = 6.1681e-04
Loss = 1.1029e-01, PNorm = 55.6900, GNorm = 1.0345, lr_0 = 6.1638e-04
Loss = 1.1591e-01, PNorm = 55.7005, GNorm = 0.5660, lr_0 = 6.1596e-04
Loss = 1.1675e-01, PNorm = 55.7136, GNorm = 1.9201, lr_0 = 6.1554e-04
Loss = 1.1239e-01, PNorm = 55.7339, GNorm = 1.3939, lr_0 = 6.1512e-04
Loss = 1.3533e-01, PNorm = 55.7526, GNorm = 0.4893, lr_0 = 6.1470e-04
Loss = 1.2747e-01, PNorm = 55.7685, GNorm = 0.9927, lr_0 = 6.1428e-04
Loss = 1.0946e-01, PNorm = 55.7849, GNorm = 0.6650, lr_0 = 6.1385e-04
Loss = 1.1050e-01, PNorm = 55.7964, GNorm = 0.8027, lr_0 = 6.1343e-04
Loss = 1.1652e-01, PNorm = 55.8112, GNorm = 0.9528, lr_0 = 6.1301e-04
Loss = 1.0311e-01, PNorm = 55.8250, GNorm = 0.6440, lr_0 = 6.1259e-04
Loss = 1.2746e-01, PNorm = 55.8353, GNorm = 0.7933, lr_0 = 6.1217e-04
Loss = 1.0336e-01, PNorm = 55.8527, GNorm = 1.3488, lr_0 = 6.1175e-04
Loss = 1.1619e-01, PNorm = 55.8631, GNorm = 1.3796, lr_0 = 6.1134e-04
Loss = 1.1459e-01, PNorm = 55.8758, GNorm = 1.3986, lr_0 = 6.1092e-04
Loss = 1.2178e-01, PNorm = 55.8908, GNorm = 1.0761, lr_0 = 6.1050e-04
Validation mae = 0.426036
Epoch 8
Loss = 1.1322e-01, PNorm = 55.9070, GNorm = 1.1516, lr_0 = 6.1008e-04
Loss = 9.0787e-02, PNorm = 55.9226, GNorm = 1.0199, lr_0 = 6.0966e-04
Loss = 1.1489e-01, PNorm = 55.9386, GNorm = 1.5904, lr_0 = 6.0924e-04
Loss = 1.2123e-01, PNorm = 55.9574, GNorm = 0.8881, lr_0 = 6.0883e-04
Loss = 1.0375e-01, PNorm = 55.9703, GNorm = 0.4788, lr_0 = 6.0841e-04
Loss = 9.8699e-02, PNorm = 55.9805, GNorm = 0.5748, lr_0 = 6.0799e-04
Loss = 1.0637e-01, PNorm = 55.9951, GNorm = 0.7674, lr_0 = 6.0758e-04
Loss = 1.1300e-01, PNorm = 56.0119, GNorm = 0.8143, lr_0 = 6.0716e-04
Loss = 1.0129e-01, PNorm = 56.0329, GNorm = 0.5515, lr_0 = 6.0674e-04
Loss = 9.9934e-02, PNorm = 56.0491, GNorm = 0.6422, lr_0 = 6.0633e-04
Loss = 1.0750e-01, PNorm = 56.0620, GNorm = 0.8817, lr_0 = 6.0591e-04
Loss = 9.1079e-02, PNorm = 56.0782, GNorm = 0.8238, lr_0 = 6.0550e-04
Loss = 1.2754e-01, PNorm = 56.0944, GNorm = 1.0724, lr_0 = 6.0508e-04
Loss = 1.0868e-01, PNorm = 56.1143, GNorm = 0.8204, lr_0 = 6.0467e-04
Loss = 1.2568e-01, PNorm = 56.1371, GNorm = 0.5338, lr_0 = 6.0425e-04
Loss = 1.2349e-01, PNorm = 56.1580, GNorm = 0.7542, lr_0 = 6.0384e-04
Loss = 1.0566e-01, PNorm = 56.1715, GNorm = 0.6289, lr_0 = 6.0343e-04
Loss = 1.0470e-01, PNorm = 56.1873, GNorm = 0.7003, lr_0 = 6.0301e-04
Loss = 1.0203e-01, PNorm = 56.1962, GNorm = 0.4704, lr_0 = 6.0260e-04
Loss = 9.5803e-02, PNorm = 56.2060, GNorm = 0.6337, lr_0 = 6.0219e-04
Loss = 1.1648e-01, PNorm = 56.2170, GNorm = 0.9311, lr_0 = 6.0178e-04
Loss = 1.1251e-01, PNorm = 56.2251, GNorm = 0.6482, lr_0 = 6.0136e-04
Loss = 1.0303e-01, PNorm = 56.2373, GNorm = 1.0424, lr_0 = 6.0095e-04
Loss = 1.1600e-01, PNorm = 56.2559, GNorm = 0.5674, lr_0 = 6.0054e-04
Loss = 1.1932e-01, PNorm = 56.2747, GNorm = 0.7939, lr_0 = 6.0013e-04
Loss = 1.1059e-01, PNorm = 56.2934, GNorm = 0.3969, lr_0 = 5.9972e-04
Loss = 1.0492e-01, PNorm = 56.3102, GNorm = 0.9696, lr_0 = 5.9931e-04
Loss = 1.0181e-01, PNorm = 56.3267, GNorm = 0.8574, lr_0 = 5.9890e-04
Loss = 1.0017e-01, PNorm = 56.3419, GNorm = 0.6914, lr_0 = 5.9849e-04
Loss = 1.1095e-01, PNorm = 56.3555, GNorm = 1.7842, lr_0 = 5.9808e-04
Loss = 1.1464e-01, PNorm = 56.3688, GNorm = 0.9684, lr_0 = 5.9767e-04
Loss = 1.3052e-01, PNorm = 56.3843, GNorm = 1.0854, lr_0 = 5.9726e-04
Loss = 1.1177e-01, PNorm = 56.3988, GNorm = 1.1602, lr_0 = 5.9685e-04
Loss = 1.2224e-01, PNorm = 56.4170, GNorm = 1.0888, lr_0 = 5.9644e-04
Loss = 1.1611e-01, PNorm = 56.4344, GNorm = 1.0201, lr_0 = 5.9603e-04
Loss = 1.1908e-01, PNorm = 56.4466, GNorm = 1.1725, lr_0 = 5.9562e-04
Loss = 1.0576e-01, PNorm = 56.4584, GNorm = 1.1232, lr_0 = 5.9521e-04
Loss = 9.9928e-02, PNorm = 56.4748, GNorm = 0.6637, lr_0 = 5.9481e-04
Loss = 9.5268e-02, PNorm = 56.4863, GNorm = 0.8791, lr_0 = 5.9440e-04
Loss = 1.0932e-01, PNorm = 56.4978, GNorm = 0.8438, lr_0 = 5.9399e-04
Loss = 9.9084e-02, PNorm = 56.5083, GNorm = 0.4982, lr_0 = 5.9358e-04
Loss = 9.9916e-02, PNorm = 56.5193, GNorm = 0.6299, lr_0 = 5.9318e-04
Loss = 1.0427e-01, PNorm = 56.5305, GNorm = 0.9950, lr_0 = 5.9277e-04
Loss = 8.6232e-02, PNorm = 56.5415, GNorm = 0.7267, lr_0 = 5.9236e-04
Loss = 1.0325e-01, PNorm = 56.5514, GNorm = 0.8081, lr_0 = 5.9196e-04
Loss = 1.0628e-01, PNorm = 56.5636, GNorm = 0.5577, lr_0 = 5.9155e-04
Loss = 1.0151e-01, PNorm = 56.5796, GNorm = 1.2866, lr_0 = 5.9115e-04
Loss = 1.0394e-01, PNorm = 56.5940, GNorm = 0.8031, lr_0 = 5.9074e-04
Loss = 1.1274e-01, PNorm = 56.6080, GNorm = 0.9102, lr_0 = 5.9034e-04
Loss = 9.4325e-02, PNorm = 56.6182, GNorm = 0.6443, lr_0 = 5.8993e-04
Loss = 1.2352e-01, PNorm = 56.6328, GNorm = 1.5546, lr_0 = 5.8953e-04
Loss = 8.4866e-02, PNorm = 56.6471, GNorm = 1.0191, lr_0 = 5.8913e-04
Loss = 1.0835e-01, PNorm = 56.6592, GNorm = 0.6022, lr_0 = 5.8872e-04
Loss = 9.0738e-02, PNorm = 56.6771, GNorm = 0.6308, lr_0 = 5.8832e-04
Loss = 1.1439e-01, PNorm = 56.6966, GNorm = 1.2139, lr_0 = 5.8792e-04
Loss = 1.2012e-01, PNorm = 56.7155, GNorm = 1.8034, lr_0 = 5.8751e-04
Loss = 1.2858e-01, PNorm = 56.7356, GNorm = 0.6297, lr_0 = 5.8711e-04
Loss = 1.0900e-01, PNorm = 56.7469, GNorm = 0.6124, lr_0 = 5.8671e-04
Loss = 1.0657e-01, PNorm = 56.7595, GNorm = 0.7074, lr_0 = 5.8631e-04
Loss = 1.0202e-01, PNorm = 56.7761, GNorm = 0.7133, lr_0 = 5.8591e-04
Loss = 1.0681e-01, PNorm = 56.7903, GNorm = 1.1631, lr_0 = 5.8550e-04
Loss = 9.4203e-02, PNorm = 56.8095, GNorm = 1.0093, lr_0 = 5.8510e-04
Loss = 1.0203e-01, PNorm = 56.8223, GNorm = 1.0532, lr_0 = 5.8470e-04
Loss = 1.0952e-01, PNorm = 56.8367, GNorm = 0.7353, lr_0 = 5.8430e-04
Loss = 9.5227e-02, PNorm = 56.8455, GNorm = 1.1773, lr_0 = 5.8390e-04
Loss = 9.7239e-02, PNorm = 56.8492, GNorm = 0.9591, lr_0 = 5.8350e-04
Loss = 1.1787e-01, PNorm = 56.8558, GNorm = 1.6967, lr_0 = 5.8310e-04
Loss = 1.0452e-01, PNorm = 56.8693, GNorm = 0.5693, lr_0 = 5.8270e-04
Loss = 9.9017e-02, PNorm = 56.8861, GNorm = 0.8049, lr_0 = 5.8230e-04
Loss = 1.1759e-01, PNorm = 56.9030, GNorm = 1.0598, lr_0 = 5.8190e-04
Loss = 1.1383e-01, PNorm = 56.9152, GNorm = 1.0322, lr_0 = 5.8151e-04
Loss = 1.1695e-01, PNorm = 56.9229, GNorm = 0.9141, lr_0 = 5.8111e-04
Loss = 1.0628e-01, PNorm = 56.9346, GNorm = 0.8677, lr_0 = 5.8071e-04
Loss = 1.1267e-01, PNorm = 56.9486, GNorm = 1.2151, lr_0 = 5.8031e-04
Loss = 1.1732e-01, PNorm = 56.9613, GNorm = 0.7860, lr_0 = 5.7991e-04
Loss = 1.0582e-01, PNorm = 56.9744, GNorm = 0.6357, lr_0 = 5.7952e-04
Loss = 1.0818e-01, PNorm = 56.9858, GNorm = 0.6975, lr_0 = 5.7912e-04
Loss = 1.0071e-01, PNorm = 57.0016, GNorm = 0.9404, lr_0 = 5.7872e-04
Loss = 1.1675e-01, PNorm = 57.0149, GNorm = 0.7590, lr_0 = 5.7833e-04
Loss = 1.1047e-01, PNorm = 57.0231, GNorm = 0.9944, lr_0 = 5.7793e-04
Loss = 1.0891e-01, PNorm = 57.0306, GNorm = 0.6549, lr_0 = 5.7753e-04
Loss = 1.0386e-01, PNorm = 57.0393, GNorm = 0.6111, lr_0 = 5.7714e-04
Loss = 1.1456e-01, PNorm = 57.0551, GNorm = 0.7117, lr_0 = 5.7674e-04
Loss = 1.0972e-01, PNorm = 57.0656, GNorm = 0.7422, lr_0 = 5.7635e-04
Loss = 1.0496e-01, PNorm = 57.0776, GNorm = 0.9531, lr_0 = 5.7595e-04
Loss = 1.1656e-01, PNorm = 57.0943, GNorm = 1.1394, lr_0 = 5.7556e-04
Loss = 1.0573e-01, PNorm = 57.1076, GNorm = 0.6020, lr_0 = 5.7516e-04
Loss = 1.1808e-01, PNorm = 57.1180, GNorm = 1.5915, lr_0 = 5.7477e-04
Loss = 1.1908e-01, PNorm = 57.1321, GNorm = 0.8299, lr_0 = 5.7438e-04
Loss = 1.0620e-01, PNorm = 57.1429, GNorm = 0.7274, lr_0 = 5.7398e-04
Loss = 1.1233e-01, PNorm = 57.1589, GNorm = 0.4895, lr_0 = 5.7359e-04
Loss = 1.0316e-01, PNorm = 57.1718, GNorm = 1.0049, lr_0 = 5.7320e-04
Loss = 1.0331e-01, PNorm = 57.1819, GNorm = 0.9206, lr_0 = 5.7280e-04
Loss = 1.1886e-01, PNorm = 57.1933, GNorm = 0.5957, lr_0 = 5.7241e-04
Loss = 1.1780e-01, PNorm = 57.2060, GNorm = 1.0944, lr_0 = 5.7202e-04
Loss = 1.0486e-01, PNorm = 57.2147, GNorm = 0.8468, lr_0 = 5.7163e-04
Loss = 1.1918e-01, PNorm = 57.2303, GNorm = 1.3944, lr_0 = 5.7124e-04
Loss = 1.0427e-01, PNorm = 57.2462, GNorm = 0.8838, lr_0 = 5.7084e-04
Loss = 9.1358e-02, PNorm = 57.2608, GNorm = 0.4641, lr_0 = 5.7045e-04
Loss = 1.0799e-01, PNorm = 57.2746, GNorm = 0.4829, lr_0 = 5.7006e-04
Loss = 1.1696e-01, PNorm = 57.2842, GNorm = 1.0767, lr_0 = 5.6967e-04
Loss = 9.7440e-02, PNorm = 57.2957, GNorm = 0.6378, lr_0 = 5.6928e-04
Loss = 1.0449e-01, PNorm = 57.3113, GNorm = 0.7947, lr_0 = 5.6889e-04
Loss = 9.8027e-02, PNorm = 57.3206, GNorm = 0.4801, lr_0 = 5.6850e-04
Loss = 1.2412e-01, PNorm = 57.3264, GNorm = 1.6327, lr_0 = 5.6811e-04
Loss = 1.2626e-01, PNorm = 57.3392, GNorm = 0.8194, lr_0 = 5.6772e-04
Loss = 1.3129e-01, PNorm = 57.3581, GNorm = 0.8186, lr_0 = 5.6733e-04
Loss = 1.0185e-01, PNorm = 57.3745, GNorm = 0.5341, lr_0 = 5.6695e-04
Loss = 1.0796e-01, PNorm = 57.3874, GNorm = 0.5746, lr_0 = 5.6656e-04
Loss = 1.4019e-01, PNorm = 57.4007, GNorm = 0.6942, lr_0 = 5.6617e-04
Loss = 1.2765e-01, PNorm = 57.4225, GNorm = 1.8114, lr_0 = 5.6578e-04
Loss = 1.3530e-01, PNorm = 57.4490, GNorm = 0.6757, lr_0 = 5.6539e-04
Loss = 1.1017e-01, PNorm = 57.4689, GNorm = 0.7036, lr_0 = 5.6501e-04
Loss = 1.1092e-01, PNorm = 57.4825, GNorm = 0.7970, lr_0 = 5.6462e-04
Loss = 1.0889e-01, PNorm = 57.4980, GNorm = 0.9449, lr_0 = 5.6423e-04
Loss = 1.1729e-01, PNorm = 57.5103, GNorm = 1.1459, lr_0 = 5.6385e-04
Loss = 1.1775e-01, PNorm = 57.5232, GNorm = 1.4646, lr_0 = 5.6346e-04
Loss = 1.0409e-01, PNorm = 57.5356, GNorm = 0.6536, lr_0 = 5.6307e-04
Loss = 1.0581e-01, PNorm = 57.5483, GNorm = 0.8921, lr_0 = 5.6269e-04
Loss = 9.6320e-02, PNorm = 57.5557, GNorm = 1.0808, lr_0 = 5.6230e-04
Validation mae = 0.424912
Epoch 9
Loss = 9.9524e-02, PNorm = 57.5679, GNorm = 0.7238, lr_0 = 5.6192e-04
Loss = 1.0221e-01, PNorm = 57.5830, GNorm = 0.7196, lr_0 = 5.6153e-04
Loss = 8.7319e-02, PNorm = 57.5982, GNorm = 0.5101, lr_0 = 5.6115e-04
Loss = 1.0175e-01, PNorm = 57.6081, GNorm = 1.6472, lr_0 = 5.6076e-04
Loss = 8.7406e-02, PNorm = 57.6272, GNorm = 0.5846, lr_0 = 5.6038e-04
Loss = 9.6502e-02, PNorm = 57.6434, GNorm = 0.5446, lr_0 = 5.6000e-04
Loss = 9.5751e-02, PNorm = 57.6583, GNorm = 1.0490, lr_0 = 5.5961e-04
Loss = 8.5699e-02, PNorm = 57.6696, GNorm = 1.0699, lr_0 = 5.5923e-04
Loss = 8.8379e-02, PNorm = 57.6790, GNorm = 0.5716, lr_0 = 5.5885e-04
Loss = 1.0341e-01, PNorm = 57.6912, GNorm = 1.1225, lr_0 = 5.5846e-04
Loss = 9.5780e-02, PNorm = 57.7046, GNorm = 0.8390, lr_0 = 5.5808e-04
Loss = 8.8940e-02, PNorm = 57.7159, GNorm = 0.6016, lr_0 = 5.5770e-04
Loss = 9.2518e-02, PNorm = 57.7343, GNorm = 0.4314, lr_0 = 5.5732e-04
Loss = 8.6955e-02, PNorm = 57.7454, GNorm = 1.4860, lr_0 = 5.5693e-04
Loss = 9.3105e-02, PNorm = 57.7575, GNorm = 0.6512, lr_0 = 5.5655e-04
Loss = 1.0338e-01, PNorm = 57.7706, GNorm = 0.6864, lr_0 = 5.5617e-04
Loss = 9.9222e-02, PNorm = 57.7821, GNorm = 0.4857, lr_0 = 5.5579e-04
Loss = 9.7557e-02, PNorm = 57.7936, GNorm = 0.7114, lr_0 = 5.5541e-04
Loss = 1.0693e-01, PNorm = 57.8061, GNorm = 0.6263, lr_0 = 5.5503e-04
Loss = 1.2227e-01, PNorm = 57.8252, GNorm = 1.6233, lr_0 = 5.5465e-04
Loss = 9.9292e-02, PNorm = 57.8433, GNorm = 1.5791, lr_0 = 5.5427e-04
Loss = 9.7363e-02, PNorm = 57.8575, GNorm = 1.5823, lr_0 = 5.5389e-04
Loss = 1.1153e-01, PNorm = 57.8768, GNorm = 0.7245, lr_0 = 5.5351e-04
Loss = 1.0966e-01, PNorm = 57.8977, GNorm = 0.9866, lr_0 = 5.5313e-04
Loss = 1.1184e-01, PNorm = 57.9104, GNorm = 0.8255, lr_0 = 5.5275e-04
Loss = 9.8709e-02, PNorm = 57.9236, GNorm = 1.1693, lr_0 = 5.5237e-04
Loss = 1.0174e-01, PNorm = 57.9332, GNorm = 0.5845, lr_0 = 5.5199e-04
Loss = 9.2607e-02, PNorm = 57.9437, GNorm = 0.4481, lr_0 = 5.5162e-04
Loss = 1.0477e-01, PNorm = 57.9563, GNorm = 0.5153, lr_0 = 5.5124e-04
Loss = 1.1810e-01, PNorm = 57.9705, GNorm = 0.6084, lr_0 = 5.5086e-04
Loss = 1.1130e-01, PNorm = 57.9836, GNorm = 0.5680, lr_0 = 5.5048e-04
Loss = 9.9919e-02, PNorm = 57.9956, GNorm = 0.4514, lr_0 = 5.5011e-04
Loss = 1.0513e-01, PNorm = 58.0119, GNorm = 0.7754, lr_0 = 5.4973e-04
Loss = 1.0036e-01, PNorm = 58.0248, GNorm = 0.8346, lr_0 = 5.4935e-04
Loss = 1.0879e-01, PNorm = 58.0413, GNorm = 0.8727, lr_0 = 5.4898e-04
Loss = 1.0292e-01, PNorm = 58.0568, GNorm = 1.6353, lr_0 = 5.4860e-04
Loss = 1.0756e-01, PNorm = 58.0710, GNorm = 0.7061, lr_0 = 5.4822e-04
Loss = 1.0875e-01, PNorm = 58.0818, GNorm = 0.9500, lr_0 = 5.4785e-04
Loss = 1.2926e-01, PNorm = 58.0952, GNorm = 1.3932, lr_0 = 5.4747e-04
Loss = 1.1394e-01, PNorm = 58.1118, GNorm = 1.0387, lr_0 = 5.4710e-04
Loss = 9.4178e-02, PNorm = 58.1276, GNorm = 0.9458, lr_0 = 5.4672e-04
Loss = 1.0575e-01, PNorm = 58.1392, GNorm = 1.1920, lr_0 = 5.4635e-04
Loss = 1.0527e-01, PNorm = 58.1505, GNorm = 0.4565, lr_0 = 5.4597e-04
Loss = 1.0901e-01, PNorm = 58.1630, GNorm = 1.1227, lr_0 = 5.4560e-04
Loss = 9.7880e-02, PNorm = 58.1762, GNorm = 1.1496, lr_0 = 5.4523e-04
Loss = 1.0093e-01, PNorm = 58.1936, GNorm = 1.4332, lr_0 = 5.4485e-04
Loss = 1.0196e-01, PNorm = 58.2099, GNorm = 1.4173, lr_0 = 5.4448e-04
Loss = 1.0843e-01, PNorm = 58.2231, GNorm = 0.7702, lr_0 = 5.4411e-04
Loss = 1.0162e-01, PNorm = 58.2378, GNorm = 0.6436, lr_0 = 5.4373e-04
Loss = 8.5492e-02, PNorm = 58.2498, GNorm = 1.1941, lr_0 = 5.4336e-04
Loss = 1.0470e-01, PNorm = 58.2600, GNorm = 0.6746, lr_0 = 5.4299e-04
Loss = 9.7499e-02, PNorm = 58.2734, GNorm = 0.6471, lr_0 = 5.4262e-04
Loss = 1.1215e-01, PNorm = 58.2864, GNorm = 1.3854, lr_0 = 5.4225e-04
Loss = 9.0536e-02, PNorm = 58.2999, GNorm = 0.7749, lr_0 = 5.4187e-04
Loss = 9.1785e-02, PNorm = 58.3096, GNorm = 0.5672, lr_0 = 5.4150e-04
Loss = 1.1570e-01, PNorm = 58.3278, GNorm = 1.4112, lr_0 = 5.4113e-04
Loss = 1.0591e-01, PNorm = 58.3501, GNorm = 0.9885, lr_0 = 5.4076e-04
Loss = 1.0691e-01, PNorm = 58.3671, GNorm = 0.8582, lr_0 = 5.4039e-04
Loss = 1.0847e-01, PNorm = 58.3807, GNorm = 0.7510, lr_0 = 5.4002e-04
Loss = 1.1725e-01, PNorm = 58.3980, GNorm = 0.9841, lr_0 = 5.3965e-04
Loss = 9.1750e-02, PNorm = 58.4102, GNorm = 1.0008, lr_0 = 5.3928e-04
Loss = 1.0957e-01, PNorm = 58.4218, GNorm = 0.6879, lr_0 = 5.3891e-04
Loss = 1.1017e-01, PNorm = 58.4359, GNorm = 0.6916, lr_0 = 5.3854e-04
Loss = 8.5579e-02, PNorm = 58.4461, GNorm = 0.7704, lr_0 = 5.3817e-04
Loss = 8.2706e-02, PNorm = 58.4537, GNorm = 0.8183, lr_0 = 5.3781e-04
Loss = 1.0920e-01, PNorm = 58.4697, GNorm = 0.7379, lr_0 = 5.3744e-04
Loss = 1.0209e-01, PNorm = 58.4747, GNorm = 0.9255, lr_0 = 5.3707e-04
Loss = 1.0328e-01, PNorm = 58.4808, GNorm = 0.6117, lr_0 = 5.3670e-04
Loss = 1.0114e-01, PNorm = 58.4963, GNorm = 1.6801, lr_0 = 5.3633e-04
Loss = 1.0801e-01, PNorm = 58.5112, GNorm = 0.8563, lr_0 = 5.3597e-04
Loss = 9.6791e-02, PNorm = 58.5210, GNorm = 0.5589, lr_0 = 5.3560e-04
Loss = 1.0970e-01, PNorm = 58.5331, GNorm = 0.9448, lr_0 = 5.3523e-04
Loss = 9.6323e-02, PNorm = 58.5503, GNorm = 0.6217, lr_0 = 5.3486e-04
Loss = 9.7592e-02, PNorm = 58.5640, GNorm = 1.7040, lr_0 = 5.3450e-04
Loss = 9.8213e-02, PNorm = 58.5726, GNorm = 0.6867, lr_0 = 5.3413e-04
Loss = 7.7828e-02, PNorm = 58.5813, GNorm = 0.5213, lr_0 = 5.3377e-04
Loss = 1.0605e-01, PNorm = 58.5920, GNorm = 0.5865, lr_0 = 5.3340e-04
Loss = 1.1154e-01, PNorm = 58.6006, GNorm = 0.8737, lr_0 = 5.3304e-04
Loss = 1.0641e-01, PNorm = 58.6106, GNorm = 0.5050, lr_0 = 5.3267e-04
Loss = 1.1158e-01, PNorm = 58.6262, GNorm = 0.5137, lr_0 = 5.3231e-04
Loss = 9.5436e-02, PNorm = 58.6390, GNorm = 0.6509, lr_0 = 5.3194e-04
Loss = 1.1475e-01, PNorm = 58.6472, GNorm = 1.5840, lr_0 = 5.3158e-04
Loss = 1.3134e-01, PNorm = 58.6586, GNorm = 1.4194, lr_0 = 5.3121e-04
Loss = 1.1007e-01, PNorm = 58.6740, GNorm = 0.7308, lr_0 = 5.3085e-04
Loss = 1.0184e-01, PNorm = 58.6886, GNorm = 0.4656, lr_0 = 5.3048e-04
Loss = 1.0156e-01, PNorm = 58.7027, GNorm = 0.5078, lr_0 = 5.3012e-04
Loss = 9.7375e-02, PNorm = 58.7162, GNorm = 0.6990, lr_0 = 5.2976e-04
Loss = 1.1395e-01, PNorm = 58.7271, GNorm = 0.9084, lr_0 = 5.2939e-04
Loss = 9.0219e-02, PNorm = 58.7396, GNorm = 0.9470, lr_0 = 5.2903e-04
Loss = 1.0382e-01, PNorm = 58.7524, GNorm = 1.4844, lr_0 = 5.2867e-04
Loss = 1.0464e-01, PNorm = 58.7657, GNorm = 0.7175, lr_0 = 5.2831e-04
Loss = 1.0021e-01, PNorm = 58.7714, GNorm = 0.7887, lr_0 = 5.2795e-04
Loss = 1.0458e-01, PNorm = 58.7802, GNorm = 0.6851, lr_0 = 5.2758e-04
Loss = 9.5105e-02, PNorm = 58.7954, GNorm = 0.8837, lr_0 = 5.2722e-04
Loss = 9.7679e-02, PNorm = 58.8117, GNorm = 0.7750, lr_0 = 5.2686e-04
Loss = 1.1004e-01, PNorm = 58.8281, GNorm = 0.6345, lr_0 = 5.2650e-04
Loss = 1.1261e-01, PNorm = 58.8469, GNorm = 0.6481, lr_0 = 5.2614e-04
Loss = 1.0008e-01, PNorm = 58.8620, GNorm = 0.8408, lr_0 = 5.2578e-04
Loss = 9.3271e-02, PNorm = 58.8746, GNorm = 0.6065, lr_0 = 5.2542e-04
Loss = 1.0798e-01, PNorm = 58.8871, GNorm = 0.6950, lr_0 = 5.2506e-04
Loss = 8.8838e-02, PNorm = 58.9012, GNorm = 0.7826, lr_0 = 5.2470e-04
Loss = 9.8779e-02, PNorm = 58.9190, GNorm = 0.8205, lr_0 = 5.2434e-04
Loss = 1.0023e-01, PNorm = 58.9327, GNorm = 0.7644, lr_0 = 5.2398e-04
Loss = 9.1976e-02, PNorm = 58.9446, GNorm = 1.4388, lr_0 = 5.2362e-04
Loss = 1.1171e-01, PNorm = 58.9591, GNorm = 0.5964, lr_0 = 5.2326e-04
Loss = 9.5709e-02, PNorm = 58.9712, GNorm = 0.8908, lr_0 = 5.2290e-04
Loss = 9.2183e-02, PNorm = 58.9841, GNorm = 0.5130, lr_0 = 5.2255e-04
Loss = 1.0991e-01, PNorm = 58.9906, GNorm = 0.6350, lr_0 = 5.2219e-04
Loss = 1.2463e-01, PNorm = 59.0035, GNorm = 0.6644, lr_0 = 5.2183e-04
Loss = 9.1779e-02, PNorm = 59.0210, GNorm = 0.4937, lr_0 = 5.2147e-04
Loss = 1.0223e-01, PNorm = 59.0339, GNorm = 0.6175, lr_0 = 5.2112e-04
Loss = 1.2536e-01, PNorm = 59.0425, GNorm = 0.9317, lr_0 = 5.2076e-04
Loss = 1.0563e-01, PNorm = 59.0533, GNorm = 0.7748, lr_0 = 5.2040e-04
Loss = 9.4741e-02, PNorm = 59.0601, GNorm = 0.5714, lr_0 = 5.2005e-04
Loss = 9.9540e-02, PNorm = 59.0633, GNorm = 0.5707, lr_0 = 5.1969e-04
Loss = 1.0393e-01, PNorm = 59.0703, GNorm = 0.6168, lr_0 = 5.1933e-04
Loss = 1.0633e-01, PNorm = 59.0849, GNorm = 0.6911, lr_0 = 5.1898e-04
Loss = 1.0887e-01, PNorm = 59.1012, GNorm = 1.5538, lr_0 = 5.1862e-04
Loss = 1.0357e-01, PNorm = 59.1110, GNorm = 0.7935, lr_0 = 5.1827e-04
Loss = 1.0810e-01, PNorm = 59.1240, GNorm = 1.2584, lr_0 = 5.1791e-04
Validation mae = 0.417008
Epoch 10
Loss = 9.0331e-02, PNorm = 59.1356, GNorm = 0.9665, lr_0 = 5.1756e-04
Loss = 9.3885e-02, PNorm = 59.1507, GNorm = 1.0320, lr_0 = 5.1720e-04
Loss = 8.4929e-02, PNorm = 59.1652, GNorm = 0.6467, lr_0 = 5.1685e-04
Loss = 9.3892e-02, PNorm = 59.1829, GNorm = 1.0115, lr_0 = 5.1649e-04
Loss = 9.6203e-02, PNorm = 59.1968, GNorm = 0.6850, lr_0 = 5.1614e-04
Loss = 8.4349e-02, PNorm = 59.2076, GNorm = 0.8045, lr_0 = 5.1579e-04
Loss = 9.3030e-02, PNorm = 59.2169, GNorm = 0.7454, lr_0 = 5.1543e-04
Loss = 9.7590e-02, PNorm = 59.2250, GNorm = 0.9944, lr_0 = 5.1508e-04
Loss = 8.6886e-02, PNorm = 59.2372, GNorm = 0.9507, lr_0 = 5.1473e-04
Loss = 8.3796e-02, PNorm = 59.2508, GNorm = 1.1548, lr_0 = 5.1437e-04
Loss = 8.9337e-02, PNorm = 59.2618, GNorm = 0.5884, lr_0 = 5.1402e-04
Loss = 9.3171e-02, PNorm = 59.2731, GNorm = 0.5988, lr_0 = 5.1367e-04
Loss = 9.4537e-02, PNorm = 59.2851, GNorm = 0.5616, lr_0 = 5.1332e-04
Loss = 9.9402e-02, PNorm = 59.2977, GNorm = 0.7241, lr_0 = 5.1297e-04
Loss = 8.6044e-02, PNorm = 59.3106, GNorm = 0.5422, lr_0 = 5.1262e-04
Loss = 9.5159e-02, PNorm = 59.3205, GNorm = 0.6042, lr_0 = 5.1226e-04
Loss = 8.2072e-02, PNorm = 59.3326, GNorm = 0.5233, lr_0 = 5.1191e-04
Loss = 8.9764e-02, PNorm = 59.3452, GNorm = 0.5259, lr_0 = 5.1156e-04
Loss = 9.6209e-02, PNorm = 59.3623, GNorm = 0.7935, lr_0 = 5.1121e-04
Loss = 9.9319e-02, PNorm = 59.3752, GNorm = 1.3231, lr_0 = 5.1086e-04
Loss = 9.8258e-02, PNorm = 59.3849, GNorm = 1.1316, lr_0 = 5.1051e-04
Loss = 9.0090e-02, PNorm = 59.3966, GNorm = 0.6142, lr_0 = 5.1016e-04
Loss = 8.8051e-02, PNorm = 59.4162, GNorm = 0.9183, lr_0 = 5.0981e-04
Loss = 8.3121e-02, PNorm = 59.4297, GNorm = 0.6010, lr_0 = 5.0946e-04
Loss = 8.4665e-02, PNorm = 59.4390, GNorm = 0.8701, lr_0 = 5.0911e-04
Loss = 9.4078e-02, PNorm = 59.4460, GNorm = 1.3288, lr_0 = 5.0877e-04
Loss = 9.6927e-02, PNorm = 59.4569, GNorm = 0.5404, lr_0 = 5.0842e-04
Loss = 8.6490e-02, PNorm = 59.4681, GNorm = 0.5970, lr_0 = 5.0807e-04
Loss = 1.0170e-01, PNorm = 59.4796, GNorm = 0.8479, lr_0 = 5.0772e-04
Loss = 8.6510e-02, PNorm = 59.4916, GNorm = 0.8242, lr_0 = 5.0737e-04
Loss = 9.0163e-02, PNorm = 59.5075, GNorm = 0.7930, lr_0 = 5.0703e-04
Loss = 8.3692e-02, PNorm = 59.5174, GNorm = 0.9281, lr_0 = 5.0668e-04
Loss = 8.5443e-02, PNorm = 59.5279, GNorm = 0.4417, lr_0 = 5.0633e-04
Loss = 9.8531e-02, PNorm = 59.5359, GNorm = 1.0006, lr_0 = 5.0598e-04
Loss = 9.4420e-02, PNorm = 59.5443, GNorm = 0.7638, lr_0 = 5.0564e-04
Loss = 8.7810e-02, PNorm = 59.5568, GNorm = 0.9214, lr_0 = 5.0529e-04
Loss = 1.0056e-01, PNorm = 59.5698, GNorm = 0.8768, lr_0 = 5.0494e-04
Loss = 9.2410e-02, PNorm = 59.5826, GNorm = 0.5692, lr_0 = 5.0460e-04
Loss = 8.7119e-02, PNorm = 59.5946, GNorm = 0.6341, lr_0 = 5.0425e-04
Loss = 9.3279e-02, PNorm = 59.6063, GNorm = 0.8745, lr_0 = 5.0391e-04
Loss = 8.7311e-02, PNorm = 59.6178, GNorm = 0.7847, lr_0 = 5.0356e-04
Loss = 9.9955e-02, PNorm = 59.6264, GNorm = 0.6317, lr_0 = 5.0322e-04
Loss = 8.3172e-02, PNorm = 59.6395, GNorm = 0.8356, lr_0 = 5.0287e-04
Loss = 8.3269e-02, PNorm = 59.6526, GNorm = 0.6205, lr_0 = 5.0253e-04
Loss = 8.4823e-02, PNorm = 59.6643, GNorm = 0.8315, lr_0 = 5.0218e-04
Loss = 9.5149e-02, PNorm = 59.6782, GNorm = 0.6202, lr_0 = 5.0184e-04
Loss = 9.5422e-02, PNorm = 59.6866, GNorm = 0.7840, lr_0 = 5.0150e-04
Loss = 8.3777e-02, PNorm = 59.6983, GNorm = 0.8245, lr_0 = 5.0115e-04
Loss = 1.0132e-01, PNorm = 59.7123, GNorm = 0.5687, lr_0 = 5.0081e-04
Loss = 9.3470e-02, PNorm = 59.7229, GNorm = 0.6287, lr_0 = 5.0047e-04
Loss = 9.5739e-02, PNorm = 59.7292, GNorm = 0.7285, lr_0 = 5.0012e-04
Loss = 9.1465e-02, PNorm = 59.7404, GNorm = 1.0574, lr_0 = 4.9978e-04
Loss = 9.0398e-02, PNorm = 59.7508, GNorm = 0.9196, lr_0 = 4.9944e-04
Loss = 9.1207e-02, PNorm = 59.7628, GNorm = 0.7534, lr_0 = 4.9910e-04
Loss = 9.1391e-02, PNorm = 59.7719, GNorm = 1.2565, lr_0 = 4.9875e-04
Loss = 8.9201e-02, PNorm = 59.7859, GNorm = 0.5123, lr_0 = 4.9841e-04
Loss = 9.5046e-02, PNorm = 59.7955, GNorm = 0.6654, lr_0 = 4.9807e-04
Loss = 9.6519e-02, PNorm = 59.8058, GNorm = 0.6730, lr_0 = 4.9773e-04
Loss = 8.5469e-02, PNorm = 59.8169, GNorm = 0.5918, lr_0 = 4.9739e-04
Loss = 8.9380e-02, PNorm = 59.8242, GNorm = 0.5543, lr_0 = 4.9705e-04
Loss = 8.3536e-02, PNorm = 59.8327, GNorm = 0.8564, lr_0 = 4.9671e-04
Loss = 9.2014e-02, PNorm = 59.8424, GNorm = 0.8089, lr_0 = 4.9637e-04
Loss = 1.0223e-01, PNorm = 59.8564, GNorm = 0.7295, lr_0 = 4.9603e-04
Loss = 8.7459e-02, PNorm = 59.8647, GNorm = 1.1101, lr_0 = 4.9569e-04
Loss = 9.6213e-02, PNorm = 59.8744, GNorm = 1.0849, lr_0 = 4.9535e-04
Loss = 8.9435e-02, PNorm = 59.8832, GNorm = 1.0702, lr_0 = 4.9501e-04
Loss = 9.6216e-02, PNorm = 59.8955, GNorm = 0.8243, lr_0 = 4.9467e-04
Loss = 1.0246e-01, PNorm = 59.9123, GNorm = 0.9406, lr_0 = 4.9433e-04
Loss = 8.7274e-02, PNorm = 59.9244, GNorm = 0.6652, lr_0 = 4.9399e-04
Loss = 8.2082e-02, PNorm = 59.9335, GNorm = 0.7032, lr_0 = 4.9365e-04
Loss = 1.0620e-01, PNorm = 59.9440, GNorm = 0.6225, lr_0 = 4.9332e-04
Loss = 1.0026e-01, PNorm = 59.9542, GNorm = 0.4738, lr_0 = 4.9298e-04
Loss = 1.0189e-01, PNorm = 59.9668, GNorm = 0.7661, lr_0 = 4.9264e-04
Loss = 8.7070e-02, PNorm = 59.9754, GNorm = 0.6792, lr_0 = 4.9230e-04
Loss = 1.0240e-01, PNorm = 59.9884, GNorm = 1.3146, lr_0 = 4.9197e-04
Loss = 1.0579e-01, PNorm = 59.9977, GNorm = 1.2223, lr_0 = 4.9163e-04
Loss = 1.0130e-01, PNorm = 60.0097, GNorm = 0.6548, lr_0 = 4.9129e-04
Loss = 8.5145e-02, PNorm = 60.0197, GNorm = 0.4835, lr_0 = 4.9095e-04
Loss = 1.2011e-01, PNorm = 60.0353, GNorm = 0.9135, lr_0 = 4.9062e-04
Loss = 9.2793e-02, PNorm = 60.0536, GNorm = 0.8436, lr_0 = 4.9028e-04
Loss = 9.0167e-02, PNorm = 60.0646, GNorm = 0.5577, lr_0 = 4.8995e-04
Loss = 9.3337e-02, PNorm = 60.0709, GNorm = 0.5307, lr_0 = 4.8961e-04
Loss = 9.9840e-02, PNorm = 60.0817, GNorm = 0.5170, lr_0 = 4.8928e-04
Loss = 1.0219e-01, PNorm = 60.0963, GNorm = 0.8053, lr_0 = 4.8894e-04
Loss = 9.9369e-02, PNorm = 60.1101, GNorm = 0.5589, lr_0 = 4.8861e-04
Loss = 9.1908e-02, PNorm = 60.1214, GNorm = 0.6013, lr_0 = 4.8827e-04
Loss = 7.7610e-02, PNorm = 60.1314, GNorm = 0.6077, lr_0 = 4.8794e-04
Loss = 1.0781e-01, PNorm = 60.1402, GNorm = 0.5798, lr_0 = 4.8760e-04
Loss = 9.6377e-02, PNorm = 60.1496, GNorm = 0.9033, lr_0 = 4.8727e-04
Loss = 9.9511e-02, PNorm = 60.1612, GNorm = 1.0636, lr_0 = 4.8693e-04
Loss = 1.1205e-01, PNorm = 60.1731, GNorm = 1.2699, lr_0 = 4.8660e-04
Loss = 1.1553e-01, PNorm = 60.1861, GNorm = 0.7631, lr_0 = 4.8627e-04
Loss = 1.0093e-01, PNorm = 60.1979, GNorm = 1.1466, lr_0 = 4.8593e-04
Loss = 8.9763e-02, PNorm = 60.2081, GNorm = 0.6077, lr_0 = 4.8560e-04
Loss = 1.0205e-01, PNorm = 60.2162, GNorm = 0.6152, lr_0 = 4.8527e-04
Loss = 1.1100e-01, PNorm = 60.2278, GNorm = 0.9669, lr_0 = 4.8494e-04
Loss = 9.9861e-02, PNorm = 60.2373, GNorm = 0.6818, lr_0 = 4.8460e-04
Loss = 9.1426e-02, PNorm = 60.2488, GNorm = 0.5820, lr_0 = 4.8427e-04
Loss = 1.1213e-01, PNorm = 60.2579, GNorm = 1.1399, lr_0 = 4.8394e-04
Loss = 1.0362e-01, PNorm = 60.2743, GNorm = 1.1303, lr_0 = 4.8361e-04
Loss = 9.5619e-02, PNorm = 60.2926, GNorm = 0.8359, lr_0 = 4.8328e-04
Loss = 8.5920e-02, PNorm = 60.3064, GNorm = 0.5582, lr_0 = 4.8295e-04
Loss = 1.0004e-01, PNorm = 60.3180, GNorm = 0.5795, lr_0 = 4.8262e-04
Loss = 9.8059e-02, PNorm = 60.3271, GNorm = 0.5004, lr_0 = 4.8228e-04
Loss = 9.7598e-02, PNorm = 60.3358, GNorm = 0.6086, lr_0 = 4.8195e-04
Loss = 1.0427e-01, PNorm = 60.3459, GNorm = 0.6535, lr_0 = 4.8162e-04
Loss = 1.0469e-01, PNorm = 60.3592, GNorm = 0.5356, lr_0 = 4.8129e-04
Loss = 9.4852e-02, PNorm = 60.3756, GNorm = 0.7905, lr_0 = 4.8096e-04
Loss = 9.9494e-02, PNorm = 60.3893, GNorm = 1.0258, lr_0 = 4.8064e-04
Loss = 9.6087e-02, PNorm = 60.3991, GNorm = 1.2209, lr_0 = 4.8031e-04
Loss = 9.2962e-02, PNorm = 60.4046, GNorm = 1.1294, lr_0 = 4.7998e-04
Loss = 9.0260e-02, PNorm = 60.4134, GNorm = 0.6160, lr_0 = 4.7965e-04
Loss = 1.0827e-01, PNorm = 60.4234, GNorm = 0.8456, lr_0 = 4.7932e-04
Loss = 1.0905e-01, PNorm = 60.4362, GNorm = 0.7846, lr_0 = 4.7899e-04
Loss = 1.0666e-01, PNorm = 60.4417, GNorm = 1.3952, lr_0 = 4.7866e-04
Loss = 1.0523e-01, PNorm = 60.4565, GNorm = 0.6643, lr_0 = 4.7833e-04
Loss = 8.7413e-02, PNorm = 60.4693, GNorm = 0.6824, lr_0 = 4.7801e-04
Loss = 9.0918e-02, PNorm = 60.4810, GNorm = 0.5255, lr_0 = 4.7768e-04
Loss = 8.7382e-02, PNorm = 60.4893, GNorm = 0.6333, lr_0 = 4.7735e-04
Loss = 1.1251e-01, PNorm = 60.4982, GNorm = 1.2944, lr_0 = 4.7703e-04
Validation mae = 0.430731
Epoch 11
Loss = 8.0660e-02, PNorm = 60.5084, GNorm = 0.5849, lr_0 = 4.7670e-04
Loss = 6.4394e-02, PNorm = 60.5165, GNorm = 0.5944, lr_0 = 4.7637e-04
Loss = 8.1303e-02, PNorm = 60.5274, GNorm = 0.8042, lr_0 = 4.7605e-04
Loss = 8.5790e-02, PNorm = 60.5403, GNorm = 0.8149, lr_0 = 4.7572e-04
Loss = 8.5061e-02, PNorm = 60.5511, GNorm = 0.5259, lr_0 = 4.7539e-04
Loss = 8.2743e-02, PNorm = 60.5591, GNorm = 1.1837, lr_0 = 4.7507e-04
Loss = 9.1844e-02, PNorm = 60.5719, GNorm = 0.6116, lr_0 = 4.7474e-04
Loss = 8.2295e-02, PNorm = 60.5859, GNorm = 0.5433, lr_0 = 4.7442e-04
Loss = 9.9382e-02, PNorm = 60.5965, GNorm = 1.0694, lr_0 = 4.7409e-04
Loss = 8.9609e-02, PNorm = 60.6066, GNorm = 1.1089, lr_0 = 4.7377e-04
Loss = 9.8829e-02, PNorm = 60.6197, GNorm = 1.5536, lr_0 = 4.7344e-04
Loss = 7.7654e-02, PNorm = 60.6363, GNorm = 0.4610, lr_0 = 4.7312e-04
Loss = 1.0801e-01, PNorm = 60.6489, GNorm = 1.4813, lr_0 = 4.7279e-04
Loss = 8.8544e-02, PNorm = 60.6622, GNorm = 1.0036, lr_0 = 4.7247e-04
Loss = 9.0184e-02, PNorm = 60.6718, GNorm = 0.6963, lr_0 = 4.7215e-04
Loss = 9.1642e-02, PNorm = 60.6801, GNorm = 0.7982, lr_0 = 4.7182e-04
Loss = 9.0191e-02, PNorm = 60.6898, GNorm = 0.7651, lr_0 = 4.7150e-04
Loss = 8.7240e-02, PNorm = 60.7003, GNorm = 0.5112, lr_0 = 4.7118e-04
Loss = 8.6141e-02, PNorm = 60.7105, GNorm = 1.0839, lr_0 = 4.7085e-04
Loss = 8.0659e-02, PNorm = 60.7197, GNorm = 0.6768, lr_0 = 4.7053e-04
Loss = 8.9511e-02, PNorm = 60.7288, GNorm = 0.6686, lr_0 = 4.7021e-04
Loss = 8.8849e-02, PNorm = 60.7424, GNorm = 0.8182, lr_0 = 4.6989e-04
Loss = 8.9878e-02, PNorm = 60.7574, GNorm = 0.5695, lr_0 = 4.6957e-04
Loss = 8.4858e-02, PNorm = 60.7693, GNorm = 0.5404, lr_0 = 4.6924e-04
Loss = 8.4019e-02, PNorm = 60.7748, GNorm = 0.6279, lr_0 = 4.6892e-04
Loss = 8.3963e-02, PNorm = 60.7805, GNorm = 0.6135, lr_0 = 4.6860e-04
Loss = 9.4655e-02, PNorm = 60.7908, GNorm = 1.1527, lr_0 = 4.6828e-04
Loss = 7.1160e-02, PNorm = 60.8027, GNorm = 0.6441, lr_0 = 4.6796e-04
Loss = 8.8866e-02, PNorm = 60.8166, GNorm = 0.4953, lr_0 = 4.6764e-04
Loss = 9.2784e-02, PNorm = 60.8260, GNorm = 0.5697, lr_0 = 4.6732e-04
Loss = 8.7752e-02, PNorm = 60.8339, GNorm = 0.4154, lr_0 = 4.6700e-04
Loss = 8.0859e-02, PNorm = 60.8443, GNorm = 1.0404, lr_0 = 4.6668e-04
Loss = 9.0480e-02, PNorm = 60.8567, GNorm = 0.9899, lr_0 = 4.6636e-04
Loss = 8.8559e-02, PNorm = 60.8664, GNorm = 0.4884, lr_0 = 4.6604e-04
Loss = 8.3017e-02, PNorm = 60.8801, GNorm = 0.6164, lr_0 = 4.6572e-04
Loss = 9.5087e-02, PNorm = 60.8951, GNorm = 1.3312, lr_0 = 4.6540e-04
Loss = 8.9116e-02, PNorm = 60.9060, GNorm = 0.6868, lr_0 = 4.6508e-04
Loss = 8.3498e-02, PNorm = 60.9153, GNorm = 0.6603, lr_0 = 4.6476e-04
Loss = 7.9454e-02, PNorm = 60.9259, GNorm = 0.7984, lr_0 = 4.6445e-04
Loss = 8.9424e-02, PNorm = 60.9396, GNorm = 0.9201, lr_0 = 4.6413e-04
Loss = 8.6284e-02, PNorm = 60.9548, GNorm = 0.8832, lr_0 = 4.6381e-04
Loss = 7.2890e-02, PNorm = 60.9678, GNorm = 0.5630, lr_0 = 4.6349e-04
Loss = 7.5092e-02, PNorm = 60.9766, GNorm = 0.9349, lr_0 = 4.6317e-04
Loss = 8.9681e-02, PNorm = 60.9861, GNorm = 1.0915, lr_0 = 4.6286e-04
Loss = 8.8612e-02, PNorm = 60.9953, GNorm = 0.5519, lr_0 = 4.6254e-04
Loss = 8.4336e-02, PNorm = 61.0067, GNorm = 1.1358, lr_0 = 4.6222e-04
Loss = 7.9929e-02, PNorm = 61.0160, GNorm = 0.4709, lr_0 = 4.6191e-04
Loss = 8.0402e-02, PNorm = 61.0240, GNorm = 1.1190, lr_0 = 4.6159e-04
Loss = 9.6038e-02, PNorm = 61.0316, GNorm = 0.6302, lr_0 = 4.6127e-04
Loss = 7.5016e-02, PNorm = 61.0403, GNorm = 0.5686, lr_0 = 4.6096e-04
Loss = 8.7574e-02, PNorm = 61.0501, GNorm = 0.7498, lr_0 = 4.6064e-04
Loss = 8.0961e-02, PNorm = 61.0615, GNorm = 1.2063, lr_0 = 4.6033e-04
Loss = 9.2977e-02, PNorm = 61.0719, GNorm = 1.1233, lr_0 = 4.6001e-04
Loss = 8.9472e-02, PNorm = 61.0815, GNorm = 0.8497, lr_0 = 4.5970e-04
Loss = 9.1272e-02, PNorm = 61.0949, GNorm = 0.6513, lr_0 = 4.5938e-04
Loss = 7.7066e-02, PNorm = 61.1052, GNorm = 0.6770, lr_0 = 4.5907e-04
Loss = 9.4900e-02, PNorm = 61.1158, GNorm = 0.6894, lr_0 = 4.5875e-04
Loss = 1.0081e-01, PNorm = 61.1354, GNorm = 0.4703, lr_0 = 4.5844e-04
Loss = 9.2346e-02, PNorm = 61.1458, GNorm = 0.9363, lr_0 = 4.5812e-04
Loss = 1.0075e-01, PNorm = 61.1579, GNorm = 0.6416, lr_0 = 4.5781e-04
Loss = 8.4977e-02, PNorm = 61.1738, GNorm = 0.8117, lr_0 = 4.5750e-04
Loss = 9.1600e-02, PNorm = 61.1862, GNorm = 0.5592, lr_0 = 4.5718e-04
Loss = 1.0066e-01, PNorm = 61.1933, GNorm = 1.2395, lr_0 = 4.5687e-04
Loss = 8.9368e-02, PNorm = 61.2070, GNorm = 0.7578, lr_0 = 4.5656e-04
Loss = 8.3798e-02, PNorm = 61.2165, GNorm = 0.5782, lr_0 = 4.5624e-04
Loss = 9.2113e-02, PNorm = 61.2262, GNorm = 0.7048, lr_0 = 4.5593e-04
Loss = 8.6323e-02, PNorm = 61.2355, GNorm = 0.6988, lr_0 = 4.5562e-04
Loss = 1.0453e-01, PNorm = 61.2447, GNorm = 0.7758, lr_0 = 4.5531e-04
Loss = 7.7629e-02, PNorm = 61.2551, GNorm = 0.6339, lr_0 = 4.5499e-04
Loss = 8.1841e-02, PNorm = 61.2638, GNorm = 0.7091, lr_0 = 4.5468e-04
Loss = 8.2749e-02, PNorm = 61.2721, GNorm = 0.7622, lr_0 = 4.5437e-04
Loss = 8.3573e-02, PNorm = 61.2838, GNorm = 0.5277, lr_0 = 4.5406e-04
Loss = 9.1472e-02, PNorm = 61.2924, GNorm = 0.6524, lr_0 = 4.5375e-04
Loss = 9.7218e-02, PNorm = 61.3028, GNorm = 0.6090, lr_0 = 4.5344e-04
Loss = 9.5434e-02, PNorm = 61.3136, GNorm = 0.8671, lr_0 = 4.5313e-04
Loss = 9.1840e-02, PNorm = 61.3261, GNorm = 1.1780, lr_0 = 4.5282e-04
Loss = 8.7707e-02, PNorm = 61.3351, GNorm = 0.6333, lr_0 = 4.5251e-04
Loss = 9.0859e-02, PNorm = 61.3436, GNorm = 1.1317, lr_0 = 4.5220e-04
Loss = 1.0556e-01, PNorm = 61.3558, GNorm = 0.8187, lr_0 = 4.5189e-04
Loss = 7.9857e-02, PNorm = 61.3684, GNorm = 1.0300, lr_0 = 4.5158e-04
Loss = 8.5169e-02, PNorm = 61.3759, GNorm = 0.6588, lr_0 = 4.5127e-04
Loss = 8.2180e-02, PNorm = 61.3867, GNorm = 0.5533, lr_0 = 4.5096e-04
Loss = 1.0430e-01, PNorm = 61.3970, GNorm = 0.6657, lr_0 = 4.5065e-04
Loss = 7.8837e-02, PNorm = 61.4034, GNorm = 0.4679, lr_0 = 4.5034e-04
Loss = 9.4389e-02, PNorm = 61.4106, GNorm = 1.1459, lr_0 = 4.5003e-04
Loss = 1.0262e-01, PNorm = 61.4217, GNorm = 0.8223, lr_0 = 4.4972e-04
Loss = 8.2138e-02, PNorm = 61.4329, GNorm = 0.4817, lr_0 = 4.4942e-04
Loss = 8.4267e-02, PNorm = 61.4414, GNorm = 1.0475, lr_0 = 4.4911e-04
Loss = 8.3356e-02, PNorm = 61.4507, GNorm = 0.4687, lr_0 = 4.4880e-04
Loss = 9.9359e-02, PNorm = 61.4601, GNorm = 0.9236, lr_0 = 4.4849e-04
Loss = 8.5714e-02, PNorm = 61.4697, GNorm = 0.4527, lr_0 = 4.4819e-04
Loss = 8.2837e-02, PNorm = 61.4847, GNorm = 0.8083, lr_0 = 4.4788e-04
Loss = 8.7002e-02, PNorm = 61.4978, GNorm = 0.6417, lr_0 = 4.4757e-04
Loss = 7.8953e-02, PNorm = 61.5059, GNorm = 0.8541, lr_0 = 4.4727e-04
Loss = 7.8431e-02, PNorm = 61.5126, GNorm = 0.8945, lr_0 = 4.4696e-04
Loss = 9.0923e-02, PNorm = 61.5188, GNorm = 1.0183, lr_0 = 4.4665e-04
Loss = 8.4576e-02, PNorm = 61.5296, GNorm = 0.7232, lr_0 = 4.4635e-04
Loss = 9.4470e-02, PNorm = 61.5372, GNorm = 0.8967, lr_0 = 4.4604e-04
Loss = 9.0453e-02, PNorm = 61.5463, GNorm = 0.4321, lr_0 = 4.4574e-04
Loss = 8.9865e-02, PNorm = 61.5556, GNorm = 0.5197, lr_0 = 4.4543e-04
Loss = 8.5203e-02, PNorm = 61.5685, GNorm = 0.5566, lr_0 = 4.4513e-04
Loss = 7.2206e-02, PNorm = 61.5740, GNorm = 0.5846, lr_0 = 4.4482e-04
Loss = 8.7391e-02, PNorm = 61.5788, GNorm = 1.1133, lr_0 = 4.4452e-04
Loss = 8.7314e-02, PNorm = 61.5866, GNorm = 1.1231, lr_0 = 4.4421e-04
Loss = 9.4105e-02, PNorm = 61.5980, GNorm = 0.6060, lr_0 = 4.4391e-04
Loss = 9.4226e-02, PNorm = 61.6145, GNorm = 0.4080, lr_0 = 4.4360e-04
Loss = 1.0057e-01, PNorm = 61.6260, GNorm = 0.8568, lr_0 = 4.4330e-04
Loss = 9.6558e-02, PNorm = 61.6358, GNorm = 0.6740, lr_0 = 4.4299e-04
Loss = 9.5164e-02, PNorm = 61.6466, GNorm = 0.6635, lr_0 = 4.4269e-04
Loss = 9.6842e-02, PNorm = 61.6578, GNorm = 0.5325, lr_0 = 4.4239e-04
Loss = 9.7787e-02, PNorm = 61.6717, GNorm = 0.5779, lr_0 = 4.4209e-04
Loss = 8.0461e-02, PNorm = 61.6836, GNorm = 0.9303, lr_0 = 4.4178e-04
Loss = 9.3330e-02, PNorm = 61.6901, GNorm = 0.9953, lr_0 = 4.4148e-04
Loss = 1.1387e-01, PNorm = 61.6950, GNorm = 1.3541, lr_0 = 4.4118e-04
Loss = 1.0319e-01, PNorm = 61.7044, GNorm = 1.3422, lr_0 = 4.4088e-04
Loss = 1.0187e-01, PNorm = 61.7188, GNorm = 0.6224, lr_0 = 4.4057e-04
Loss = 9.5707e-02, PNorm = 61.7332, GNorm = 0.5892, lr_0 = 4.4027e-04
Loss = 9.2762e-02, PNorm = 61.7468, GNorm = 0.6110, lr_0 = 4.3997e-04
Loss = 8.1504e-02, PNorm = 61.7532, GNorm = 0.7337, lr_0 = 4.3967e-04
Loss = 8.4294e-02, PNorm = 61.7594, GNorm = 0.5696, lr_0 = 4.3937e-04
Validation mae = 0.403421
Epoch 12
Loss = 8.0172e-02, PNorm = 61.7664, GNorm = 0.4360, lr_0 = 4.3907e-04
Loss = 7.4937e-02, PNorm = 61.7780, GNorm = 0.4604, lr_0 = 4.3877e-04
Loss = 6.9463e-02, PNorm = 61.7835, GNorm = 0.5318, lr_0 = 4.3846e-04
Loss = 8.0264e-02, PNorm = 61.7926, GNorm = 0.6252, lr_0 = 4.3816e-04
Loss = 8.2708e-02, PNorm = 61.8065, GNorm = 0.6673, lr_0 = 4.3786e-04
Loss = 8.3195e-02, PNorm = 61.8162, GNorm = 0.5340, lr_0 = 4.3756e-04
Loss = 7.4844e-02, PNorm = 61.8253, GNorm = 0.4552, lr_0 = 4.3726e-04
Loss = 7.8661e-02, PNorm = 61.8355, GNorm = 0.5289, lr_0 = 4.3696e-04
Loss = 8.3658e-02, PNorm = 61.8456, GNorm = 1.0417, lr_0 = 4.3667e-04
Loss = 7.1398e-02, PNorm = 61.8580, GNorm = 0.5101, lr_0 = 4.3637e-04
Loss = 8.3555e-02, PNorm = 61.8712, GNorm = 0.5516, lr_0 = 4.3607e-04
Loss = 7.6202e-02, PNorm = 61.8834, GNorm = 0.5519, lr_0 = 4.3577e-04
Loss = 8.1826e-02, PNorm = 61.8960, GNorm = 0.7025, lr_0 = 4.3547e-04
Loss = 9.2614e-02, PNorm = 61.9069, GNorm = 0.7304, lr_0 = 4.3517e-04
Loss = 8.6405e-02, PNorm = 61.9192, GNorm = 0.9691, lr_0 = 4.3487e-04
Loss = 8.1072e-02, PNorm = 61.9291, GNorm = 0.5034, lr_0 = 4.3458e-04
Loss = 7.8420e-02, PNorm = 61.9376, GNorm = 0.7767, lr_0 = 4.3428e-04
Loss = 6.7398e-02, PNorm = 61.9473, GNorm = 0.5018, lr_0 = 4.3398e-04
Loss = 7.8729e-02, PNorm = 61.9587, GNorm = 0.4987, lr_0 = 4.3368e-04
Loss = 8.7018e-02, PNorm = 61.9724, GNorm = 0.5293, lr_0 = 4.3339e-04
Loss = 8.0051e-02, PNorm = 61.9851, GNorm = 0.8331, lr_0 = 4.3309e-04
Loss = 8.9952e-02, PNorm = 61.9951, GNorm = 1.4191, lr_0 = 4.3279e-04
Loss = 9.8306e-02, PNorm = 62.0103, GNorm = 0.7635, lr_0 = 4.3250e-04
Loss = 7.7067e-02, PNorm = 62.0239, GNorm = 0.7338, lr_0 = 4.3220e-04
Loss = 8.5661e-02, PNorm = 62.0325, GNorm = 1.1208, lr_0 = 4.3190e-04
Loss = 8.1473e-02, PNorm = 62.0456, GNorm = 0.8390, lr_0 = 4.3161e-04
Loss = 7.8639e-02, PNorm = 62.0551, GNorm = 0.8052, lr_0 = 4.3131e-04
Loss = 8.5542e-02, PNorm = 62.0607, GNorm = 1.1813, lr_0 = 4.3102e-04
Loss = 8.9969e-02, PNorm = 62.0681, GNorm = 0.8852, lr_0 = 4.3072e-04
Loss = 7.8713e-02, PNorm = 62.0794, GNorm = 0.4306, lr_0 = 4.3043e-04
Loss = 7.6086e-02, PNorm = 62.0890, GNorm = 0.6219, lr_0 = 4.3013e-04
Loss = 7.3078e-02, PNorm = 62.0989, GNorm = 0.6370, lr_0 = 4.2984e-04
Loss = 6.5843e-02, PNorm = 62.1085, GNorm = 0.8578, lr_0 = 4.2954e-04
Loss = 7.3664e-02, PNorm = 62.1186, GNorm = 0.4156, lr_0 = 4.2925e-04
Loss = 7.7177e-02, PNorm = 62.1282, GNorm = 0.5185, lr_0 = 4.2895e-04
Loss = 7.5745e-02, PNorm = 62.1350, GNorm = 0.3961, lr_0 = 4.2866e-04
Loss = 7.6358e-02, PNorm = 62.1419, GNorm = 0.8244, lr_0 = 4.2837e-04
Loss = 7.2978e-02, PNorm = 62.1476, GNorm = 1.1483, lr_0 = 4.2807e-04
Loss = 7.7333e-02, PNorm = 62.1579, GNorm = 0.6021, lr_0 = 4.2778e-04
Loss = 8.8721e-02, PNorm = 62.1701, GNorm = 0.5487, lr_0 = 4.2749e-04
Loss = 7.7897e-02, PNorm = 62.1808, GNorm = 0.6341, lr_0 = 4.2719e-04
Loss = 9.1672e-02, PNorm = 62.1870, GNorm = 0.7833, lr_0 = 4.2690e-04
Loss = 7.5766e-02, PNorm = 62.1953, GNorm = 0.5120, lr_0 = 4.2661e-04
Loss = 7.7164e-02, PNorm = 62.2023, GNorm = 0.5253, lr_0 = 4.2632e-04
Loss = 8.1626e-02, PNorm = 62.2109, GNorm = 0.4511, lr_0 = 4.2602e-04
Loss = 8.8822e-02, PNorm = 62.2209, GNorm = 0.8203, lr_0 = 4.2573e-04
Loss = 8.4881e-02, PNorm = 62.2268, GNorm = 0.5440, lr_0 = 4.2544e-04
Loss = 7.6858e-02, PNorm = 62.2309, GNorm = 0.7392, lr_0 = 4.2515e-04
Loss = 7.6848e-02, PNorm = 62.2406, GNorm = 0.5942, lr_0 = 4.2486e-04
Loss = 7.9929e-02, PNorm = 62.2507, GNorm = 0.5932, lr_0 = 4.2457e-04
Loss = 8.0055e-02, PNorm = 62.2617, GNorm = 0.5250, lr_0 = 4.2428e-04
Loss = 8.3077e-02, PNorm = 62.2692, GNorm = 1.0470, lr_0 = 4.2399e-04
Loss = 8.9466e-02, PNorm = 62.2812, GNorm = 0.6973, lr_0 = 4.2370e-04
Loss = 8.1678e-02, PNorm = 62.2875, GNorm = 0.6477, lr_0 = 4.2340e-04
Loss = 8.5273e-02, PNorm = 62.2937, GNorm = 0.8944, lr_0 = 4.2311e-04
Loss = 8.4210e-02, PNorm = 62.3016, GNorm = 1.4056, lr_0 = 4.2283e-04
Loss = 1.0022e-01, PNorm = 62.3126, GNorm = 0.6809, lr_0 = 4.2254e-04
Loss = 9.0851e-02, PNorm = 62.3254, GNorm = 0.5016, lr_0 = 4.2225e-04
Loss = 8.1326e-02, PNorm = 62.3378, GNorm = 0.6756, lr_0 = 4.2196e-04
Loss = 8.1202e-02, PNorm = 62.3491, GNorm = 0.6363, lr_0 = 4.2167e-04
Loss = 7.3784e-02, PNorm = 62.3582, GNorm = 1.2365, lr_0 = 4.2138e-04
Loss = 7.7340e-02, PNorm = 62.3656, GNorm = 0.6563, lr_0 = 4.2109e-04
Loss = 7.6017e-02, PNorm = 62.3753, GNorm = 0.6180, lr_0 = 4.2080e-04
Loss = 6.7641e-02, PNorm = 62.3837, GNorm = 0.6268, lr_0 = 4.2051e-04
Loss = 7.4684e-02, PNorm = 62.3907, GNorm = 0.7506, lr_0 = 4.2023e-04
Loss = 8.2109e-02, PNorm = 62.4009, GNorm = 0.6674, lr_0 = 4.1994e-04
Loss = 8.8622e-02, PNorm = 62.4111, GNorm = 0.7251, lr_0 = 4.1965e-04
Loss = 7.8551e-02, PNorm = 62.4216, GNorm = 0.6337, lr_0 = 4.1936e-04
Loss = 8.0448e-02, PNorm = 62.4325, GNorm = 0.6340, lr_0 = 4.1907e-04
Loss = 9.1783e-02, PNorm = 62.4417, GNorm = 0.6436, lr_0 = 4.1879e-04
Loss = 8.7886e-02, PNorm = 62.4504, GNorm = 0.8525, lr_0 = 4.1850e-04
Loss = 7.5976e-02, PNorm = 62.4617, GNorm = 0.5959, lr_0 = 4.1821e-04
Loss = 8.6425e-02, PNorm = 62.4685, GNorm = 0.6266, lr_0 = 4.1793e-04
Loss = 9.1344e-02, PNorm = 62.4726, GNorm = 0.5182, lr_0 = 4.1764e-04
Loss = 8.2460e-02, PNorm = 62.4806, GNorm = 0.6632, lr_0 = 4.1736e-04
Loss = 7.7308e-02, PNorm = 62.4891, GNorm = 0.6391, lr_0 = 4.1707e-04
Loss = 7.7707e-02, PNorm = 62.4975, GNorm = 0.8762, lr_0 = 4.1678e-04
Loss = 7.1060e-02, PNorm = 62.5005, GNorm = 0.6701, lr_0 = 4.1650e-04
Loss = 8.3743e-02, PNorm = 62.5064, GNorm = 1.0612, lr_0 = 4.1621e-04
Loss = 8.5623e-02, PNorm = 62.5152, GNorm = 0.6373, lr_0 = 4.1593e-04
Loss = 8.9566e-02, PNorm = 62.5217, GNorm = 0.9014, lr_0 = 4.1564e-04
Loss = 8.0467e-02, PNorm = 62.5325, GNorm = 0.6624, lr_0 = 4.1536e-04
Loss = 6.9112e-02, PNorm = 62.5426, GNorm = 0.4889, lr_0 = 4.1507e-04
Loss = 7.8748e-02, PNorm = 62.5509, GNorm = 0.6539, lr_0 = 4.1479e-04
Loss = 7.8669e-02, PNorm = 62.5578, GNorm = 0.6137, lr_0 = 4.1450e-04
Loss = 8.3288e-02, PNorm = 62.5660, GNorm = 0.4960, lr_0 = 4.1422e-04
Loss = 6.8833e-02, PNorm = 62.5759, GNorm = 0.4948, lr_0 = 4.1394e-04
Loss = 7.2417e-02, PNorm = 62.5862, GNorm = 0.5989, lr_0 = 4.1365e-04
Loss = 7.0395e-02, PNorm = 62.5965, GNorm = 0.5812, lr_0 = 4.1337e-04
Loss = 7.8078e-02, PNorm = 62.6050, GNorm = 0.3921, lr_0 = 4.1309e-04
Loss = 7.7974e-02, PNorm = 62.6123, GNorm = 0.8607, lr_0 = 4.1280e-04
Loss = 9.5958e-02, PNorm = 62.6218, GNorm = 1.5630, lr_0 = 4.1252e-04
Loss = 9.4255e-02, PNorm = 62.6314, GNorm = 0.5543, lr_0 = 4.1224e-04
Loss = 7.4004e-02, PNorm = 62.6383, GNorm = 0.6458, lr_0 = 4.1196e-04
Loss = 8.9723e-02, PNorm = 62.6458, GNorm = 0.7337, lr_0 = 4.1167e-04
Loss = 9.7137e-02, PNorm = 62.6553, GNorm = 1.0850, lr_0 = 4.1139e-04
Loss = 7.9425e-02, PNorm = 62.6651, GNorm = 0.5684, lr_0 = 4.1111e-04
Loss = 9.2056e-02, PNorm = 62.6736, GNorm = 0.7240, lr_0 = 4.1083e-04
Loss = 8.3290e-02, PNorm = 62.6837, GNorm = 0.6422, lr_0 = 4.1055e-04
Loss = 8.2035e-02, PNorm = 62.6907, GNorm = 1.0545, lr_0 = 4.1027e-04
Loss = 7.8455e-02, PNorm = 62.7008, GNorm = 1.2873, lr_0 = 4.0998e-04
Loss = 8.3788e-02, PNorm = 62.7136, GNorm = 0.5815, lr_0 = 4.0970e-04
Loss = 8.5006e-02, PNorm = 62.7262, GNorm = 0.9024, lr_0 = 4.0942e-04
Loss = 8.2889e-02, PNorm = 62.7368, GNorm = 0.6286, lr_0 = 4.0914e-04
Loss = 8.8197e-02, PNorm = 62.7443, GNorm = 0.7643, lr_0 = 4.0886e-04
Loss = 8.9879e-02, PNorm = 62.7515, GNorm = 0.9156, lr_0 = 4.0858e-04
Loss = 9.3097e-02, PNorm = 62.7596, GNorm = 0.6698, lr_0 = 4.0830e-04
Loss = 6.9461e-02, PNorm = 62.7687, GNorm = 0.6349, lr_0 = 4.0802e-04
Loss = 7.9097e-02, PNorm = 62.7759, GNorm = 0.4569, lr_0 = 4.0774e-04
Loss = 9.0802e-02, PNorm = 62.7856, GNorm = 0.6340, lr_0 = 4.0746e-04
Loss = 8.3961e-02, PNorm = 62.7941, GNorm = 1.2791, lr_0 = 4.0718e-04
Loss = 8.2103e-02, PNorm = 62.7999, GNorm = 0.3643, lr_0 = 4.0691e-04
Loss = 8.6391e-02, PNorm = 62.8069, GNorm = 0.5199, lr_0 = 4.0663e-04
Loss = 8.4831e-02, PNorm = 62.8164, GNorm = 1.0044, lr_0 = 4.0635e-04
Loss = 8.8016e-02, PNorm = 62.8263, GNorm = 0.7470, lr_0 = 4.0607e-04
Loss = 8.6051e-02, PNorm = 62.8343, GNorm = 0.5844, lr_0 = 4.0579e-04
Loss = 9.0766e-02, PNorm = 62.8428, GNorm = 0.9488, lr_0 = 4.0551e-04
Loss = 8.3662e-02, PNorm = 62.8500, GNorm = 0.6561, lr_0 = 4.0524e-04
Loss = 8.5328e-02, PNorm = 62.8569, GNorm = 0.7648, lr_0 = 4.0496e-04
Loss = 8.4532e-02, PNorm = 62.8625, GNorm = 0.7910, lr_0 = 4.0468e-04
Validation mae = 0.393776
Epoch 13
Loss = 7.3864e-02, PNorm = 62.8715, GNorm = 0.5736, lr_0 = 4.0440e-04
Loss = 9.7052e-02, PNorm = 62.8824, GNorm = 0.6657, lr_0 = 4.0413e-04
Loss = 6.7943e-02, PNorm = 62.8891, GNorm = 0.4723, lr_0 = 4.0385e-04
Loss = 7.1448e-02, PNorm = 62.8999, GNorm = 0.6900, lr_0 = 4.0357e-04
Loss = 8.0589e-02, PNorm = 62.9126, GNorm = 0.5878, lr_0 = 4.0330e-04
Loss = 6.2746e-02, PNorm = 62.9217, GNorm = 0.5047, lr_0 = 4.0302e-04
Loss = 8.1850e-02, PNorm = 62.9289, GNorm = 1.1464, lr_0 = 4.0274e-04
Loss = 8.1236e-02, PNorm = 62.9393, GNorm = 0.9369, lr_0 = 4.0247e-04
Loss = 8.1191e-02, PNorm = 62.9501, GNorm = 0.4443, lr_0 = 4.0219e-04
Loss = 6.2553e-02, PNorm = 62.9593, GNorm = 0.6494, lr_0 = 4.0192e-04
Loss = 7.2475e-02, PNorm = 62.9662, GNorm = 0.4888, lr_0 = 4.0164e-04
Loss = 6.3431e-02, PNorm = 62.9761, GNorm = 0.4268, lr_0 = 4.0137e-04
Loss = 7.0747e-02, PNorm = 62.9836, GNorm = 0.9709, lr_0 = 4.0109e-04
Loss = 6.9722e-02, PNorm = 62.9911, GNorm = 0.9242, lr_0 = 4.0082e-04
Loss = 6.6354e-02, PNorm = 63.0012, GNorm = 1.0005, lr_0 = 4.0054e-04
Loss = 6.8055e-02, PNorm = 63.0108, GNorm = 0.5437, lr_0 = 4.0027e-04
Loss = 6.6062e-02, PNorm = 63.0161, GNorm = 0.8749, lr_0 = 3.9999e-04
Loss = 6.9090e-02, PNorm = 63.0213, GNorm = 0.5254, lr_0 = 3.9972e-04
Loss = 6.9764e-02, PNorm = 63.0286, GNorm = 0.5888, lr_0 = 3.9945e-04
Loss = 6.4564e-02, PNorm = 63.0378, GNorm = 0.6675, lr_0 = 3.9917e-04
Loss = 6.9743e-02, PNorm = 63.0468, GNorm = 0.4786, lr_0 = 3.9890e-04
Loss = 7.3591e-02, PNorm = 63.0595, GNorm = 0.5475, lr_0 = 3.9863e-04
Loss = 7.8828e-02, PNorm = 63.0676, GNorm = 0.5610, lr_0 = 3.9835e-04
Loss = 7.7359e-02, PNorm = 63.0748, GNorm = 0.8167, lr_0 = 3.9808e-04
Loss = 7.6434e-02, PNorm = 63.0860, GNorm = 0.6113, lr_0 = 3.9781e-04
Loss = 7.9905e-02, PNorm = 63.0950, GNorm = 0.7441, lr_0 = 3.9753e-04
Loss = 6.6472e-02, PNorm = 63.1042, GNorm = 0.8455, lr_0 = 3.9726e-04
Loss = 6.9485e-02, PNorm = 63.1111, GNorm = 0.5832, lr_0 = 3.9699e-04
Loss = 7.6582e-02, PNorm = 63.1171, GNorm = 0.9788, lr_0 = 3.9672e-04
Loss = 7.7798e-02, PNorm = 63.1246, GNorm = 0.9610, lr_0 = 3.9645e-04
Loss = 8.1622e-02, PNorm = 63.1347, GNorm = 0.7853, lr_0 = 3.9617e-04
Loss = 8.3865e-02, PNorm = 63.1468, GNorm = 0.6934, lr_0 = 3.9590e-04
Loss = 1.0025e-01, PNorm = 63.1636, GNorm = 0.7346, lr_0 = 3.9563e-04
Loss = 7.2824e-02, PNorm = 63.1815, GNorm = 0.6934, lr_0 = 3.9536e-04
Loss = 6.8858e-02, PNorm = 63.1956, GNorm = 0.4992, lr_0 = 3.9509e-04
Loss = 8.2602e-02, PNorm = 63.2019, GNorm = 0.6986, lr_0 = 3.9482e-04
Loss = 1.0071e-01, PNorm = 63.2140, GNorm = 0.7310, lr_0 = 3.9455e-04
Loss = 7.3415e-02, PNorm = 63.2268, GNorm = 0.6992, lr_0 = 3.9428e-04
Loss = 8.0432e-02, PNorm = 63.2352, GNorm = 0.6319, lr_0 = 3.9401e-04
Loss = 9.0658e-02, PNorm = 63.2441, GNorm = 0.6246, lr_0 = 3.9374e-04
Loss = 8.7597e-02, PNorm = 63.2564, GNorm = 0.6444, lr_0 = 3.9347e-04
Loss = 6.9037e-02, PNorm = 63.2690, GNorm = 0.4415, lr_0 = 3.9320e-04
Loss = 7.1811e-02, PNorm = 63.2775, GNorm = 0.5309, lr_0 = 3.9293e-04
Loss = 7.5938e-02, PNorm = 63.2840, GNorm = 0.4830, lr_0 = 3.9266e-04
Loss = 7.6977e-02, PNorm = 63.2913, GNorm = 0.6163, lr_0 = 3.9239e-04
Loss = 5.9961e-02, PNorm = 63.2992, GNorm = 0.4247, lr_0 = 3.9212e-04
Loss = 6.8499e-02, PNorm = 63.3062, GNorm = 0.5522, lr_0 = 3.9185e-04
Loss = 7.9255e-02, PNorm = 63.3106, GNorm = 0.7048, lr_0 = 3.9159e-04
Loss = 7.9839e-02, PNorm = 63.3165, GNorm = 0.6635, lr_0 = 3.9132e-04
Loss = 6.2523e-02, PNorm = 63.3265, GNorm = 0.6884, lr_0 = 3.9105e-04
Loss = 6.1658e-02, PNorm = 63.3319, GNorm = 0.4790, lr_0 = 3.9078e-04
Loss = 6.5697e-02, PNorm = 63.3404, GNorm = 0.6449, lr_0 = 3.9051e-04
Loss = 7.7025e-02, PNorm = 63.3521, GNorm = 1.0024, lr_0 = 3.9025e-04
Loss = 7.7961e-02, PNorm = 63.3579, GNorm = 0.5040, lr_0 = 3.8998e-04
Loss = 8.6484e-02, PNorm = 63.3659, GNorm = 0.4910, lr_0 = 3.8971e-04
Loss = 8.5639e-02, PNorm = 63.3795, GNorm = 0.6863, lr_0 = 3.8945e-04
Loss = 7.4055e-02, PNorm = 63.3931, GNorm = 0.4717, lr_0 = 3.8918e-04
Loss = 8.3803e-02, PNorm = 63.3997, GNorm = 1.3903, lr_0 = 3.8891e-04
Loss = 7.7842e-02, PNorm = 63.4091, GNorm = 0.6132, lr_0 = 3.8865e-04
Loss = 7.1552e-02, PNorm = 63.4173, GNorm = 0.5852, lr_0 = 3.8838e-04
Loss = 8.1170e-02, PNorm = 63.4240, GNorm = 0.5462, lr_0 = 3.8811e-04
Loss = 7.1768e-02, PNorm = 63.4315, GNorm = 0.5965, lr_0 = 3.8785e-04
Loss = 7.7280e-02, PNorm = 63.4383, GNorm = 1.0554, lr_0 = 3.8758e-04
Loss = 8.9338e-02, PNorm = 63.4471, GNorm = 0.7612, lr_0 = 3.8732e-04
Loss = 7.4542e-02, PNorm = 63.4565, GNorm = 0.5973, lr_0 = 3.8705e-04
Loss = 8.3152e-02, PNorm = 63.4656, GNorm = 0.8323, lr_0 = 3.8679e-04
Loss = 7.9003e-02, PNorm = 63.4722, GNorm = 0.6442, lr_0 = 3.8652e-04
Loss = 7.8522e-02, PNorm = 63.4824, GNorm = 0.8894, lr_0 = 3.8626e-04
Loss = 8.3108e-02, PNorm = 63.4946, GNorm = 1.1735, lr_0 = 3.8599e-04
Loss = 7.1327e-02, PNorm = 63.5072, GNorm = 0.5129, lr_0 = 3.8573e-04
Loss = 8.1726e-02, PNorm = 63.5170, GNorm = 0.8101, lr_0 = 3.8546e-04
Loss = 8.2894e-02, PNorm = 63.5238, GNorm = 0.5879, lr_0 = 3.8520e-04
Loss = 7.8436e-02, PNorm = 63.5291, GNorm = 0.7683, lr_0 = 3.8493e-04
Loss = 7.8479e-02, PNorm = 63.5388, GNorm = 0.7297, lr_0 = 3.8467e-04
Loss = 5.9530e-02, PNorm = 63.5472, GNorm = 0.5522, lr_0 = 3.8441e-04
Loss = 8.9831e-02, PNorm = 63.5514, GNorm = 0.6232, lr_0 = 3.8414e-04
Loss = 7.7531e-02, PNorm = 63.5576, GNorm = 0.5628, lr_0 = 3.8388e-04
Loss = 8.5983e-02, PNorm = 63.5669, GNorm = 1.0592, lr_0 = 3.8362e-04
Loss = 7.0770e-02, PNorm = 63.5770, GNorm = 0.4841, lr_0 = 3.8336e-04
Loss = 7.9524e-02, PNorm = 63.5824, GNorm = 0.7071, lr_0 = 3.8309e-04
Loss = 6.8942e-02, PNorm = 63.5900, GNorm = 0.6148, lr_0 = 3.8283e-04
Loss = 7.7280e-02, PNorm = 63.5980, GNorm = 0.7655, lr_0 = 3.8257e-04
Loss = 7.9689e-02, PNorm = 63.6028, GNorm = 0.8198, lr_0 = 3.8231e-04
Loss = 7.3520e-02, PNorm = 63.6077, GNorm = 0.6593, lr_0 = 3.8204e-04
Loss = 6.6519e-02, PNorm = 63.6182, GNorm = 0.5493, lr_0 = 3.8178e-04
Loss = 8.9528e-02, PNorm = 63.6255, GNorm = 0.4948, lr_0 = 3.8152e-04
Loss = 8.0582e-02, PNorm = 63.6329, GNorm = 0.4847, lr_0 = 3.8126e-04
Loss = 7.9537e-02, PNorm = 63.6424, GNorm = 0.9772, lr_0 = 3.8100e-04
Loss = 9.0322e-02, PNorm = 63.6512, GNorm = 1.0336, lr_0 = 3.8074e-04
Loss = 6.5192e-02, PNorm = 63.6576, GNorm = 0.5742, lr_0 = 3.8048e-04
Loss = 7.1881e-02, PNorm = 63.6632, GNorm = 0.6527, lr_0 = 3.8022e-04
Loss = 7.5252e-02, PNorm = 63.6688, GNorm = 1.1713, lr_0 = 3.7995e-04
Loss = 7.6672e-02, PNorm = 63.6770, GNorm = 0.3764, lr_0 = 3.7969e-04
Loss = 7.1384e-02, PNorm = 63.6859, GNorm = 0.6678, lr_0 = 3.7943e-04
Loss = 8.3579e-02, PNorm = 63.6964, GNorm = 0.6877, lr_0 = 3.7917e-04
Loss = 7.8944e-02, PNorm = 63.7045, GNorm = 0.9178, lr_0 = 3.7891e-04
Loss = 8.0027e-02, PNorm = 63.7130, GNorm = 0.6338, lr_0 = 3.7866e-04
Loss = 7.3480e-02, PNorm = 63.7198, GNorm = 0.5406, lr_0 = 3.7840e-04
Loss = 7.7261e-02, PNorm = 63.7246, GNorm = 0.6381, lr_0 = 3.7814e-04
Loss = 7.8932e-02, PNorm = 63.7338, GNorm = 0.4825, lr_0 = 3.7788e-04
Loss = 7.3189e-02, PNorm = 63.7440, GNorm = 0.6775, lr_0 = 3.7762e-04
Loss = 7.4442e-02, PNorm = 63.7514, GNorm = 0.9284, lr_0 = 3.7736e-04
Loss = 8.3938e-02, PNorm = 63.7581, GNorm = 0.5325, lr_0 = 3.7710e-04
Loss = 7.0149e-02, PNorm = 63.7668, GNorm = 0.5681, lr_0 = 3.7684e-04
Loss = 7.0379e-02, PNorm = 63.7758, GNorm = 0.5232, lr_0 = 3.7659e-04
Loss = 6.0910e-02, PNorm = 63.7845, GNorm = 0.5910, lr_0 = 3.7633e-04
Loss = 7.8118e-02, PNorm = 63.7908, GNorm = 0.6343, lr_0 = 3.7607e-04
Loss = 6.7765e-02, PNorm = 63.7955, GNorm = 0.5839, lr_0 = 3.7581e-04
Loss = 7.6137e-02, PNorm = 63.8026, GNorm = 0.6128, lr_0 = 3.7555e-04
Loss = 7.9131e-02, PNorm = 63.8077, GNorm = 0.4255, lr_0 = 3.7530e-04
Loss = 8.5344e-02, PNorm = 63.8113, GNorm = 0.4867, lr_0 = 3.7504e-04
Loss = 7.0994e-02, PNorm = 63.8195, GNorm = 0.4640, lr_0 = 3.7478e-04
Loss = 9.2498e-02, PNorm = 63.8284, GNorm = 0.6749, lr_0 = 3.7453e-04
Loss = 6.8755e-02, PNorm = 63.8366, GNorm = 0.5484, lr_0 = 3.7427e-04
Loss = 8.6213e-02, PNorm = 63.8440, GNorm = 0.7949, lr_0 = 3.7401e-04
Loss = 8.1718e-02, PNorm = 63.8518, GNorm = 0.5796, lr_0 = 3.7376e-04
Loss = 8.0988e-02, PNorm = 63.8592, GNorm = 0.6348, lr_0 = 3.7350e-04
Loss = 7.9106e-02, PNorm = 63.8650, GNorm = 1.1462, lr_0 = 3.7325e-04
Loss = 8.2160e-02, PNorm = 63.8742, GNorm = 0.7277, lr_0 = 3.7299e-04
Loss = 8.4412e-02, PNorm = 63.8823, GNorm = 1.0135, lr_0 = 3.7273e-04
Validation mae = 0.399961
Epoch 14
Loss = 6.4602e-02, PNorm = 63.8918, GNorm = 1.0611, lr_0 = 3.7248e-04
Loss = 8.4005e-02, PNorm = 63.9022, GNorm = 0.7287, lr_0 = 3.7222e-04
Loss = 6.3339e-02, PNorm = 63.9117, GNorm = 0.9497, lr_0 = 3.7197e-04
Loss = 6.7303e-02, PNorm = 63.9176, GNorm = 0.4243, lr_0 = 3.7171e-04
Loss = 6.2109e-02, PNorm = 63.9240, GNorm = 0.5461, lr_0 = 3.7146e-04
Loss = 6.8972e-02, PNorm = 63.9338, GNorm = 0.9869, lr_0 = 3.7120e-04
Loss = 6.2235e-02, PNorm = 63.9397, GNorm = 0.5983, lr_0 = 3.7095e-04
Loss = 6.0257e-02, PNorm = 63.9484, GNorm = 0.8779, lr_0 = 3.7070e-04
Loss = 8.0189e-02, PNorm = 63.9602, GNorm = 0.7292, lr_0 = 3.7044e-04
Loss = 8.0234e-02, PNorm = 63.9713, GNorm = 0.9331, lr_0 = 3.7019e-04
Loss = 6.8082e-02, PNorm = 63.9793, GNorm = 0.5967, lr_0 = 3.6993e-04
Loss = 7.2048e-02, PNorm = 63.9876, GNorm = 0.7705, lr_0 = 3.6968e-04
Loss = 6.7746e-02, PNorm = 63.9984, GNorm = 0.5861, lr_0 = 3.6943e-04
Loss = 7.8894e-02, PNorm = 64.0088, GNorm = 0.8114, lr_0 = 3.6917e-04
Loss = 6.3080e-02, PNorm = 64.0199, GNorm = 0.8773, lr_0 = 3.6892e-04
Loss = 6.0618e-02, PNorm = 64.0268, GNorm = 0.6677, lr_0 = 3.6867e-04
Loss = 7.5106e-02, PNorm = 64.0364, GNorm = 0.5656, lr_0 = 3.6842e-04
Loss = 7.5665e-02, PNorm = 64.0445, GNorm = 0.6122, lr_0 = 3.6816e-04
Loss = 8.4152e-02, PNorm = 64.0534, GNorm = 0.8658, lr_0 = 3.6791e-04
Loss = 6.8250e-02, PNorm = 64.0607, GNorm = 0.6075, lr_0 = 3.6766e-04
Loss = 6.7221e-02, PNorm = 64.0687, GNorm = 0.6160, lr_0 = 3.6741e-04
Loss = 8.5969e-02, PNorm = 64.0803, GNorm = 0.8591, lr_0 = 3.6716e-04
Loss = 7.2938e-02, PNorm = 64.0891, GNorm = 0.9583, lr_0 = 3.6690e-04
Loss = 7.7635e-02, PNorm = 64.0957, GNorm = 0.5706, lr_0 = 3.6665e-04
Loss = 7.0012e-02, PNorm = 64.1024, GNorm = 0.6038, lr_0 = 3.6640e-04
Loss = 6.7710e-02, PNorm = 64.1112, GNorm = 0.8673, lr_0 = 3.6615e-04
Loss = 6.7467e-02, PNorm = 64.1157, GNorm = 0.6384, lr_0 = 3.6590e-04
Loss = 6.5976e-02, PNorm = 64.1229, GNorm = 0.7062, lr_0 = 3.6565e-04
Loss = 7.8638e-02, PNorm = 64.1321, GNorm = 0.4955, lr_0 = 3.6540e-04
Loss = 7.5983e-02, PNorm = 64.1405, GNorm = 0.6136, lr_0 = 3.6515e-04
Loss = 7.4473e-02, PNorm = 64.1486, GNorm = 0.6620, lr_0 = 3.6490e-04
Loss = 7.4492e-02, PNorm = 64.1563, GNorm = 0.5388, lr_0 = 3.6465e-04
Loss = 7.9998e-02, PNorm = 64.1666, GNorm = 1.1533, lr_0 = 3.6440e-04
Loss = 7.1873e-02, PNorm = 64.1770, GNorm = 0.4515, lr_0 = 3.6415e-04
Loss = 7.2503e-02, PNorm = 64.1861, GNorm = 0.4883, lr_0 = 3.6390e-04
Loss = 6.3559e-02, PNorm = 64.1912, GNorm = 0.7277, lr_0 = 3.6365e-04
Loss = 5.8126e-02, PNorm = 64.1992, GNorm = 0.5127, lr_0 = 3.6340e-04
Loss = 7.2908e-02, PNorm = 64.2094, GNorm = 0.6491, lr_0 = 3.6315e-04
Loss = 6.7932e-02, PNorm = 64.2158, GNorm = 0.7464, lr_0 = 3.6290e-04
Loss = 7.7048e-02, PNorm = 64.2204, GNorm = 0.6874, lr_0 = 3.6266e-04
Loss = 6.8310e-02, PNorm = 64.2271, GNorm = 0.5915, lr_0 = 3.6241e-04
Loss = 7.9359e-02, PNorm = 64.2352, GNorm = 0.5540, lr_0 = 3.6216e-04
Loss = 7.3132e-02, PNorm = 64.2395, GNorm = 0.6948, lr_0 = 3.6191e-04
Loss = 6.5690e-02, PNorm = 64.2463, GNorm = 0.4959, lr_0 = 3.6166e-04
Loss = 6.9898e-02, PNorm = 64.2534, GNorm = 0.6666, lr_0 = 3.6141e-04
Loss = 7.1523e-02, PNorm = 64.2604, GNorm = 0.7159, lr_0 = 3.6117e-04
Loss = 6.2708e-02, PNorm = 64.2680, GNorm = 0.6402, lr_0 = 3.6092e-04
Loss = 7.0171e-02, PNorm = 64.2747, GNorm = 0.4748, lr_0 = 3.6067e-04
Loss = 6.6456e-02, PNorm = 64.2793, GNorm = 0.4840, lr_0 = 3.6043e-04
Loss = 7.3315e-02, PNorm = 64.2881, GNorm = 0.6485, lr_0 = 3.6018e-04
Loss = 6.9426e-02, PNorm = 64.2988, GNorm = 0.5032, lr_0 = 3.5993e-04
Loss = 6.9356e-02, PNorm = 64.3093, GNorm = 0.5637, lr_0 = 3.5969e-04
Loss = 8.0019e-02, PNorm = 64.3141, GNorm = 0.5572, lr_0 = 3.5944e-04
Loss = 6.3209e-02, PNorm = 64.3224, GNorm = 0.7045, lr_0 = 3.5919e-04
Loss = 7.8048e-02, PNorm = 64.3299, GNorm = 0.5989, lr_0 = 3.5895e-04
Loss = 7.5324e-02, PNorm = 64.3393, GNorm = 0.9885, lr_0 = 3.5870e-04
Loss = 7.8811e-02, PNorm = 64.3477, GNorm = 1.1653, lr_0 = 3.5845e-04
Loss = 7.2876e-02, PNorm = 64.3550, GNorm = 0.8797, lr_0 = 3.5821e-04
Loss = 6.4040e-02, PNorm = 64.3637, GNorm = 0.8733, lr_0 = 3.5796e-04
Loss = 6.4154e-02, PNorm = 64.3729, GNorm = 0.8572, lr_0 = 3.5772e-04
Loss = 7.5025e-02, PNorm = 64.3773, GNorm = 0.6842, lr_0 = 3.5747e-04
Loss = 7.4781e-02, PNorm = 64.3849, GNorm = 0.8363, lr_0 = 3.5723e-04
Loss = 6.6144e-02, PNorm = 64.3954, GNorm = 0.8257, lr_0 = 3.5698e-04
Loss = 7.4173e-02, PNorm = 64.4039, GNorm = 1.0925, lr_0 = 3.5674e-04
Loss = 7.1628e-02, PNorm = 64.4102, GNorm = 0.6317, lr_0 = 3.5650e-04
Loss = 8.1000e-02, PNorm = 64.4180, GNorm = 0.5145, lr_0 = 3.5625e-04
Loss = 6.7366e-02, PNorm = 64.4255, GNorm = 0.5443, lr_0 = 3.5601e-04
Loss = 7.6569e-02, PNorm = 64.4324, GNorm = 0.6294, lr_0 = 3.5576e-04
Loss = 7.0772e-02, PNorm = 64.4404, GNorm = 0.5550, lr_0 = 3.5552e-04
Loss = 6.6537e-02, PNorm = 64.4458, GNorm = 0.5904, lr_0 = 3.5528e-04
Loss = 7.4816e-02, PNorm = 64.4514, GNorm = 1.1171, lr_0 = 3.5503e-04
Loss = 5.8204e-02, PNorm = 64.4576, GNorm = 0.6189, lr_0 = 3.5479e-04
Loss = 6.0550e-02, PNorm = 64.4649, GNorm = 0.5369, lr_0 = 3.5455e-04
Loss = 7.1698e-02, PNorm = 64.4708, GNorm = 0.6626, lr_0 = 3.5430e-04
Loss = 6.1204e-02, PNorm = 64.4742, GNorm = 0.6491, lr_0 = 3.5406e-04
Loss = 7.4635e-02, PNorm = 64.4811, GNorm = 0.7052, lr_0 = 3.5382e-04
Loss = 7.0332e-02, PNorm = 64.4881, GNorm = 0.6424, lr_0 = 3.5358e-04
Loss = 7.5472e-02, PNorm = 64.4922, GNorm = 0.9074, lr_0 = 3.5333e-04
Loss = 7.2577e-02, PNorm = 64.4983, GNorm = 0.6242, lr_0 = 3.5309e-04
Loss = 6.9055e-02, PNorm = 64.5040, GNorm = 1.0716, lr_0 = 3.5285e-04
Loss = 6.9704e-02, PNorm = 64.5116, GNorm = 0.5676, lr_0 = 3.5261e-04
Loss = 7.6370e-02, PNorm = 64.5147, GNorm = 0.5910, lr_0 = 3.5237e-04
Loss = 5.7133e-02, PNorm = 64.5177, GNorm = 0.4029, lr_0 = 3.5212e-04
Loss = 7.4315e-02, PNorm = 64.5215, GNorm = 1.0946, lr_0 = 3.5188e-04
Loss = 6.8849e-02, PNorm = 64.5248, GNorm = 0.6362, lr_0 = 3.5164e-04
Loss = 7.4419e-02, PNorm = 64.5306, GNorm = 0.7697, lr_0 = 3.5140e-04
Loss = 6.5611e-02, PNorm = 64.5363, GNorm = 0.3889, lr_0 = 3.5116e-04
Loss = 6.6053e-02, PNorm = 64.5401, GNorm = 0.6010, lr_0 = 3.5092e-04
Loss = 7.8796e-02, PNorm = 64.5481, GNorm = 0.5577, lr_0 = 3.5068e-04
Loss = 6.2961e-02, PNorm = 64.5580, GNorm = 0.5754, lr_0 = 3.5044e-04
Loss = 7.2752e-02, PNorm = 64.5624, GNorm = 0.5540, lr_0 = 3.5020e-04
Loss = 8.1433e-02, PNorm = 64.5696, GNorm = 0.7126, lr_0 = 3.4996e-04
Loss = 6.6636e-02, PNorm = 64.5762, GNorm = 0.7716, lr_0 = 3.4972e-04
Loss = 7.5453e-02, PNorm = 64.5843, GNorm = 1.0384, lr_0 = 3.4948e-04
Loss = 6.1682e-02, PNorm = 64.5911, GNorm = 0.6282, lr_0 = 3.4924e-04
Loss = 7.5154e-02, PNorm = 64.5976, GNorm = 0.6456, lr_0 = 3.4900e-04
Loss = 7.0417e-02, PNorm = 64.6021, GNorm = 0.6219, lr_0 = 3.4876e-04
Loss = 8.2813e-02, PNorm = 64.6068, GNorm = 0.5305, lr_0 = 3.4852e-04
Loss = 7.0041e-02, PNorm = 64.6177, GNorm = 0.6882, lr_0 = 3.4828e-04
Loss = 6.6978e-02, PNorm = 64.6258, GNorm = 0.6538, lr_0 = 3.4805e-04
Loss = 7.3396e-02, PNorm = 64.6315, GNorm = 0.5436, lr_0 = 3.4781e-04
Loss = 7.0426e-02, PNorm = 64.6359, GNorm = 0.4986, lr_0 = 3.4757e-04
Loss = 6.1918e-02, PNorm = 64.6434, GNorm = 0.5277, lr_0 = 3.4733e-04
Loss = 6.8722e-02, PNorm = 64.6529, GNorm = 0.6861, lr_0 = 3.4709e-04
Loss = 7.3997e-02, PNorm = 64.6598, GNorm = 0.6772, lr_0 = 3.4686e-04
Loss = 6.5982e-02, PNorm = 64.6661, GNorm = 0.7243, lr_0 = 3.4662e-04
Loss = 7.7606e-02, PNorm = 64.6722, GNorm = 0.6193, lr_0 = 3.4638e-04
Loss = 6.8709e-02, PNorm = 64.6809, GNorm = 0.6175, lr_0 = 3.4614e-04
Loss = 6.9597e-02, PNorm = 64.6883, GNorm = 0.4427, lr_0 = 3.4591e-04
Loss = 7.2762e-02, PNorm = 64.6939, GNorm = 0.4736, lr_0 = 3.4567e-04
Loss = 7.2129e-02, PNorm = 64.7036, GNorm = 0.6988, lr_0 = 3.4543e-04
Loss = 6.8466e-02, PNorm = 64.7125, GNorm = 0.6142, lr_0 = 3.4520e-04
Loss = 6.9069e-02, PNorm = 64.7182, GNorm = 1.0769, lr_0 = 3.4496e-04
Loss = 6.7703e-02, PNorm = 64.7250, GNorm = 0.9437, lr_0 = 3.4472e-04
Loss = 5.8003e-02, PNorm = 64.7316, GNorm = 0.6360, lr_0 = 3.4449e-04
Loss = 7.9842e-02, PNorm = 64.7379, GNorm = 1.3931, lr_0 = 3.4425e-04
Loss = 8.2416e-02, PNorm = 64.7490, GNorm = 0.8016, lr_0 = 3.4402e-04
Loss = 7.1731e-02, PNorm = 64.7600, GNorm = 0.5187, lr_0 = 3.4378e-04
Loss = 7.8450e-02, PNorm = 64.7670, GNorm = 0.4772, lr_0 = 3.4354e-04
Loss = 8.3022e-02, PNorm = 64.7736, GNorm = 0.9450, lr_0 = 3.4331e-04
Validation mae = 0.397069
Epoch 15
Loss = 6.4442e-02, PNorm = 64.7847, GNorm = 0.6056, lr_0 = 3.4307e-04
Loss = 7.6214e-02, PNorm = 64.7958, GNorm = 0.6280, lr_0 = 3.4284e-04
Loss = 5.7738e-02, PNorm = 64.8030, GNorm = 0.3974, lr_0 = 3.4260e-04
Loss = 6.2664e-02, PNorm = 64.8122, GNorm = 0.6043, lr_0 = 3.4237e-04
Loss = 6.6045e-02, PNorm = 64.8201, GNorm = 1.2624, lr_0 = 3.4213e-04
Loss = 7.7238e-02, PNorm = 64.8279, GNorm = 0.6421, lr_0 = 3.4190e-04
Loss = 6.6503e-02, PNorm = 64.8353, GNorm = 0.7740, lr_0 = 3.4167e-04
Loss = 7.1958e-02, PNorm = 64.8444, GNorm = 0.6979, lr_0 = 3.4143e-04
Loss = 6.8947e-02, PNorm = 64.8524, GNorm = 0.6297, lr_0 = 3.4120e-04
Loss = 6.2468e-02, PNorm = 64.8600, GNorm = 0.4886, lr_0 = 3.4096e-04
Loss = 6.3904e-02, PNorm = 64.8669, GNorm = 0.5982, lr_0 = 3.4073e-04
Loss = 7.6407e-02, PNorm = 64.8743, GNorm = 0.6258, lr_0 = 3.4050e-04
Loss = 6.7299e-02, PNorm = 64.8829, GNorm = 1.1863, lr_0 = 3.4026e-04
Loss = 6.1686e-02, PNorm = 64.8873, GNorm = 0.8987, lr_0 = 3.4003e-04
Loss = 6.2895e-02, PNorm = 64.8938, GNorm = 0.5581, lr_0 = 3.3980e-04
Loss = 6.5501e-02, PNorm = 64.8999, GNorm = 0.7976, lr_0 = 3.3956e-04
Loss = 5.8528e-02, PNorm = 64.9067, GNorm = 0.5410, lr_0 = 3.3933e-04
Loss = 6.0166e-02, PNorm = 64.9124, GNorm = 0.5675, lr_0 = 3.3910e-04
Loss = 6.5369e-02, PNorm = 64.9238, GNorm = 0.8754, lr_0 = 3.3887e-04
Loss = 6.7049e-02, PNorm = 64.9343, GNorm = 0.5120, lr_0 = 3.3864e-04
Loss = 6.4151e-02, PNorm = 64.9420, GNorm = 0.7619, lr_0 = 3.3840e-04
Loss = 6.1885e-02, PNorm = 64.9485, GNorm = 0.6285, lr_0 = 3.3817e-04
Loss = 5.9113e-02, PNorm = 64.9542, GNorm = 0.6495, lr_0 = 3.3794e-04
Loss = 7.1273e-02, PNorm = 64.9599, GNorm = 0.9674, lr_0 = 3.3771e-04
Loss = 6.5745e-02, PNorm = 64.9661, GNorm = 0.7676, lr_0 = 3.3748e-04
Loss = 5.4429e-02, PNorm = 64.9718, GNorm = 0.4991, lr_0 = 3.3725e-04
Loss = 6.3629e-02, PNorm = 64.9769, GNorm = 0.6422, lr_0 = 3.3701e-04
Loss = 5.5068e-02, PNorm = 64.9820, GNorm = 0.4333, lr_0 = 3.3678e-04
Loss = 6.0091e-02, PNorm = 64.9866, GNorm = 0.3228, lr_0 = 3.3655e-04
Loss = 6.9937e-02, PNorm = 64.9966, GNorm = 0.6961, lr_0 = 3.3632e-04
Loss = 6.5854e-02, PNorm = 65.0040, GNorm = 0.5467, lr_0 = 3.3609e-04
Loss = 6.1038e-02, PNorm = 65.0111, GNorm = 0.7448, lr_0 = 3.3586e-04
Loss = 6.4278e-02, PNorm = 65.0168, GNorm = 0.5990, lr_0 = 3.3563e-04
Loss = 7.1822e-02, PNorm = 65.0230, GNorm = 0.6134, lr_0 = 3.3540e-04
Loss = 7.0294e-02, PNorm = 65.0293, GNorm = 0.4211, lr_0 = 3.3517e-04
Loss = 6.1928e-02, PNorm = 65.0330, GNorm = 0.5573, lr_0 = 3.3494e-04
Loss = 5.6310e-02, PNorm = 65.0405, GNorm = 0.4347, lr_0 = 3.3471e-04
Loss = 6.4309e-02, PNorm = 65.0459, GNorm = 0.5730, lr_0 = 3.3448e-04
Loss = 6.1874e-02, PNorm = 65.0499, GNorm = 0.5859, lr_0 = 3.3425e-04
Loss = 7.2994e-02, PNorm = 65.0581, GNorm = 0.4803, lr_0 = 3.3403e-04
Loss = 6.2920e-02, PNorm = 65.0682, GNorm = 0.7988, lr_0 = 3.3380e-04
Loss = 7.1329e-02, PNorm = 65.0771, GNorm = 0.5919, lr_0 = 3.3357e-04
Loss = 6.8856e-02, PNorm = 65.0819, GNorm = 0.7473, lr_0 = 3.3334e-04
Loss = 7.2111e-02, PNorm = 65.0882, GNorm = 0.6130, lr_0 = 3.3311e-04
Loss = 6.2945e-02, PNorm = 65.0939, GNorm = 0.4632, lr_0 = 3.3288e-04
Loss = 6.0116e-02, PNorm = 65.1005, GNorm = 0.7978, lr_0 = 3.3265e-04
Loss = 5.4899e-02, PNorm = 65.1097, GNorm = 0.4527, lr_0 = 3.3243e-04
Loss = 6.0185e-02, PNorm = 65.1158, GNorm = 0.5151, lr_0 = 3.3220e-04
Loss = 7.1100e-02, PNorm = 65.1229, GNorm = 0.5692, lr_0 = 3.3197e-04
Loss = 7.2097e-02, PNorm = 65.1320, GNorm = 1.1894, lr_0 = 3.3174e-04
Loss = 8.2061e-02, PNorm = 65.1412, GNorm = 1.1854, lr_0 = 3.3152e-04
Loss = 7.4009e-02, PNorm = 65.1497, GNorm = 0.4807, lr_0 = 3.3129e-04
Loss = 6.7651e-02, PNorm = 65.1575, GNorm = 0.6905, lr_0 = 3.3106e-04
Loss = 6.8190e-02, PNorm = 65.1662, GNorm = 0.5716, lr_0 = 3.3084e-04
Loss = 8.0918e-02, PNorm = 65.1756, GNorm = 0.9502, lr_0 = 3.3061e-04
Loss = 6.5335e-02, PNorm = 65.1821, GNorm = 0.5751, lr_0 = 3.3038e-04
Loss = 6.1357e-02, PNorm = 65.1873, GNorm = 0.4105, lr_0 = 3.3016e-04
Loss = 6.9244e-02, PNorm = 65.1925, GNorm = 0.6213, lr_0 = 3.2993e-04
Loss = 6.7153e-02, PNorm = 65.1982, GNorm = 0.6539, lr_0 = 3.2970e-04
Loss = 6.9346e-02, PNorm = 65.2045, GNorm = 0.5044, lr_0 = 3.2948e-04
Loss = 5.8113e-02, PNorm = 65.2121, GNorm = 0.7695, lr_0 = 3.2925e-04
Loss = 6.6475e-02, PNorm = 65.2204, GNorm = 0.5353, lr_0 = 3.2903e-04
Loss = 6.6020e-02, PNorm = 65.2300, GNorm = 1.2029, lr_0 = 3.2880e-04
Loss = 6.3701e-02, PNorm = 65.2391, GNorm = 0.5737, lr_0 = 3.2858e-04
Loss = 6.6909e-02, PNorm = 65.2455, GNorm = 0.6347, lr_0 = 3.2835e-04
Loss = 6.7635e-02, PNorm = 65.2525, GNorm = 0.8393, lr_0 = 3.2813e-04
Loss = 6.5179e-02, PNorm = 65.2586, GNorm = 0.6217, lr_0 = 3.2790e-04
Loss = 7.0088e-02, PNorm = 65.2632, GNorm = 0.5947, lr_0 = 3.2768e-04
Loss = 6.9994e-02, PNorm = 65.2695, GNorm = 0.4608, lr_0 = 3.2745e-04
Loss = 5.9200e-02, PNorm = 65.2770, GNorm = 0.6416, lr_0 = 3.2723e-04
Loss = 7.2436e-02, PNorm = 65.2831, GNorm = 1.0314, lr_0 = 3.2700e-04
Loss = 6.1851e-02, PNorm = 65.2878, GNorm = 0.5390, lr_0 = 3.2678e-04
Loss = 7.2135e-02, PNorm = 65.2941, GNorm = 1.1825, lr_0 = 3.2656e-04
Loss = 5.7468e-02, PNorm = 65.2978, GNorm = 0.3994, lr_0 = 3.2633e-04
Loss = 6.8422e-02, PNorm = 65.3046, GNorm = 0.5520, lr_0 = 3.2611e-04
Loss = 7.1484e-02, PNorm = 65.3128, GNorm = 0.6037, lr_0 = 3.2589e-04
Loss = 6.7263e-02, PNorm = 65.3211, GNorm = 0.6358, lr_0 = 3.2566e-04
Loss = 6.0466e-02, PNorm = 65.3260, GNorm = 0.9789, lr_0 = 3.2544e-04
Loss = 6.6533e-02, PNorm = 65.3303, GNorm = 0.8476, lr_0 = 3.2522e-04
Loss = 6.3085e-02, PNorm = 65.3337, GNorm = 0.7931, lr_0 = 3.2499e-04
Loss = 7.4846e-02, PNorm = 65.3435, GNorm = 0.9338, lr_0 = 3.2477e-04
Loss = 7.3955e-02, PNorm = 65.3527, GNorm = 0.6165, lr_0 = 3.2455e-04
Loss = 6.7511e-02, PNorm = 65.3610, GNorm = 0.5518, lr_0 = 3.2433e-04
Loss = 6.7189e-02, PNorm = 65.3695, GNorm = 0.5551, lr_0 = 3.2410e-04
Loss = 5.6499e-02, PNorm = 65.3736, GNorm = 0.9499, lr_0 = 3.2388e-04
Loss = 6.5740e-02, PNorm = 65.3801, GNorm = 0.6427, lr_0 = 3.2366e-04
Loss = 6.5511e-02, PNorm = 65.3857, GNorm = 0.5983, lr_0 = 3.2344e-04
Loss = 6.6862e-02, PNorm = 65.3898, GNorm = 1.0966, lr_0 = 3.2322e-04
Loss = 6.5804e-02, PNorm = 65.3954, GNorm = 0.4640, lr_0 = 3.2300e-04
Loss = 6.4665e-02, PNorm = 65.3990, GNorm = 0.4335, lr_0 = 3.2277e-04
Loss = 5.9157e-02, PNorm = 65.4047, GNorm = 0.6089, lr_0 = 3.2255e-04
Loss = 5.9678e-02, PNorm = 65.4090, GNorm = 0.6573, lr_0 = 3.2233e-04
Loss = 7.0113e-02, PNorm = 65.4140, GNorm = 0.7149, lr_0 = 3.2211e-04
Loss = 6.8296e-02, PNorm = 65.4211, GNorm = 0.5426, lr_0 = 3.2189e-04
Loss = 7.0105e-02, PNorm = 65.4289, GNorm = 0.4576, lr_0 = 3.2167e-04
Loss = 6.2851e-02, PNorm = 65.4367, GNorm = 0.8654, lr_0 = 3.2145e-04
Loss = 6.7962e-02, PNorm = 65.4452, GNorm = 0.4757, lr_0 = 3.2123e-04
Loss = 5.9032e-02, PNorm = 65.4501, GNorm = 0.5832, lr_0 = 3.2101e-04
Loss = 6.6558e-02, PNorm = 65.4555, GNorm = 0.6632, lr_0 = 3.2079e-04
Loss = 7.4010e-02, PNorm = 65.4639, GNorm = 1.2385, lr_0 = 3.2057e-04
Loss = 7.2147e-02, PNorm = 65.4713, GNorm = 0.5711, lr_0 = 3.2035e-04
Loss = 6.6439e-02, PNorm = 65.4775, GNorm = 0.4516, lr_0 = 3.2013e-04
Loss = 7.0602e-02, PNorm = 65.4814, GNorm = 0.5594, lr_0 = 3.1991e-04
Loss = 6.3777e-02, PNorm = 65.4844, GNorm = 0.6568, lr_0 = 3.1969e-04
Loss = 6.6841e-02, PNorm = 65.4892, GNorm = 0.7116, lr_0 = 3.1947e-04
Loss = 7.1542e-02, PNorm = 65.4936, GNorm = 0.9745, lr_0 = 3.1925e-04
Loss = 7.0955e-02, PNorm = 65.4989, GNorm = 0.5154, lr_0 = 3.1904e-04
Loss = 6.6957e-02, PNorm = 65.5058, GNorm = 0.8543, lr_0 = 3.1882e-04
Loss = 7.1161e-02, PNorm = 65.5110, GNorm = 0.5613, lr_0 = 3.1860e-04
Loss = 6.7553e-02, PNorm = 65.5166, GNorm = 0.5935, lr_0 = 3.1838e-04
Loss = 7.0892e-02, PNorm = 65.5219, GNorm = 0.5382, lr_0 = 3.1816e-04
Loss = 6.8636e-02, PNorm = 65.5291, GNorm = 0.5855, lr_0 = 3.1794e-04
Loss = 6.1963e-02, PNorm = 65.5371, GNorm = 0.7013, lr_0 = 3.1773e-04
Loss = 6.1751e-02, PNorm = 65.5426, GNorm = 0.6317, lr_0 = 3.1751e-04
Loss = 6.3868e-02, PNorm = 65.5477, GNorm = 0.4267, lr_0 = 3.1729e-04
Loss = 6.4208e-02, PNorm = 65.5551, GNorm = 0.5309, lr_0 = 3.1707e-04
Loss = 5.4519e-02, PNorm = 65.5599, GNorm = 0.4407, lr_0 = 3.1686e-04
Loss = 7.1584e-02, PNorm = 65.5656, GNorm = 0.7697, lr_0 = 3.1664e-04
Loss = 7.1609e-02, PNorm = 65.5712, GNorm = 0.4758, lr_0 = 3.1642e-04
Loss = 6.4610e-02, PNorm = 65.5776, GNorm = 0.5512, lr_0 = 3.1621e-04
Validation mae = 0.400236
Epoch 16
Loss = 5.6084e-02, PNorm = 65.5832, GNorm = 0.4547, lr_0 = 3.1599e-04
Loss = 6.6897e-02, PNorm = 65.5892, GNorm = 0.5015, lr_0 = 3.1577e-04
Loss = 4.8111e-02, PNorm = 65.5952, GNorm = 0.3953, lr_0 = 3.1556e-04
Loss = 4.7888e-02, PNorm = 65.6008, GNorm = 0.3745, lr_0 = 3.1534e-04
Loss = 5.7679e-02, PNorm = 65.6049, GNorm = 0.5630, lr_0 = 3.1512e-04
Loss = 5.6926e-02, PNorm = 65.6103, GNorm = 0.4997, lr_0 = 3.1491e-04
Loss = 5.3359e-02, PNorm = 65.6166, GNorm = 0.4889, lr_0 = 3.1469e-04
Loss = 6.5620e-02, PNorm = 65.6237, GNorm = 0.5155, lr_0 = 3.1448e-04
Loss = 5.3161e-02, PNorm = 65.6319, GNorm = 0.7472, lr_0 = 3.1426e-04
Loss = 5.9681e-02, PNorm = 65.6369, GNorm = 0.5476, lr_0 = 3.1405e-04
Loss = 7.1221e-02, PNorm = 65.6421, GNorm = 0.8284, lr_0 = 3.1383e-04
Loss = 5.8627e-02, PNorm = 65.6467, GNorm = 0.8058, lr_0 = 3.1362e-04
Loss = 5.7839e-02, PNorm = 65.6523, GNorm = 0.6061, lr_0 = 3.1340e-04
Loss = 6.0630e-02, PNorm = 65.6604, GNorm = 0.8420, lr_0 = 3.1319e-04
Loss = 5.9016e-02, PNorm = 65.6640, GNorm = 0.4596, lr_0 = 3.1297e-04
Loss = 6.0368e-02, PNorm = 65.6709, GNorm = 0.4411, lr_0 = 3.1276e-04
Loss = 6.0459e-02, PNorm = 65.6787, GNorm = 0.4986, lr_0 = 3.1254e-04
Loss = 5.8551e-02, PNorm = 65.6838, GNorm = 0.5375, lr_0 = 3.1233e-04
Loss = 6.5332e-02, PNorm = 65.6900, GNorm = 0.6785, lr_0 = 3.1212e-04
Loss = 6.2385e-02, PNorm = 65.6970, GNorm = 0.4471, lr_0 = 3.1190e-04
Loss = 6.1037e-02, PNorm = 65.7030, GNorm = 0.5430, lr_0 = 3.1169e-04
Loss = 5.3088e-02, PNorm = 65.7080, GNorm = 0.4983, lr_0 = 3.1147e-04
Loss = 5.3932e-02, PNorm = 65.7131, GNorm = 0.7275, lr_0 = 3.1126e-04
Loss = 5.9470e-02, PNorm = 65.7207, GNorm = 0.6381, lr_0 = 3.1105e-04
Loss = 6.7128e-02, PNorm = 65.7267, GNorm = 0.5333, lr_0 = 3.1083e-04
Loss = 6.3965e-02, PNorm = 65.7297, GNorm = 0.6549, lr_0 = 3.1062e-04
Loss = 6.0073e-02, PNorm = 65.7319, GNorm = 0.5567, lr_0 = 3.1041e-04
Loss = 6.3496e-02, PNorm = 65.7402, GNorm = 0.5821, lr_0 = 3.1020e-04
Loss = 6.9889e-02, PNorm = 65.7501, GNorm = 0.8428, lr_0 = 3.0998e-04
Loss = 6.8729e-02, PNorm = 65.7582, GNorm = 0.6510, lr_0 = 3.0977e-04
Loss = 7.5932e-02, PNorm = 65.7687, GNorm = 1.1177, lr_0 = 3.0956e-04
Loss = 6.0549e-02, PNorm = 65.7790, GNorm = 0.4778, lr_0 = 3.0935e-04
Loss = 7.2121e-02, PNorm = 65.7879, GNorm = 0.6435, lr_0 = 3.0914e-04
Loss = 6.6138e-02, PNorm = 65.7975, GNorm = 0.5168, lr_0 = 3.0892e-04
Loss = 6.7529e-02, PNorm = 65.8055, GNorm = 0.6059, lr_0 = 3.0871e-04
Loss = 6.3771e-02, PNorm = 65.8119, GNorm = 0.9881, lr_0 = 3.0850e-04
Loss = 5.5154e-02, PNorm = 65.8172, GNorm = 0.6227, lr_0 = 3.0829e-04
Loss = 6.2977e-02, PNorm = 65.8199, GNorm = 0.6679, lr_0 = 3.0808e-04
Loss = 5.1441e-02, PNorm = 65.8228, GNorm = 0.4514, lr_0 = 3.0787e-04
Loss = 5.4131e-02, PNorm = 65.8259, GNorm = 0.8370, lr_0 = 3.0766e-04
Loss = 6.6426e-02, PNorm = 65.8326, GNorm = 0.9968, lr_0 = 3.0745e-04
Loss = 5.4133e-02, PNorm = 65.8427, GNorm = 0.5216, lr_0 = 3.0723e-04
Loss = 6.1563e-02, PNorm = 65.8514, GNorm = 0.7022, lr_0 = 3.0702e-04
Loss = 6.0095e-02, PNorm = 65.8580, GNorm = 0.6101, lr_0 = 3.0681e-04
Loss = 6.6375e-02, PNorm = 65.8670, GNorm = 0.5516, lr_0 = 3.0660e-04
Loss = 7.5006e-02, PNorm = 65.8744, GNorm = 0.8575, lr_0 = 3.0639e-04
Loss = 6.3407e-02, PNorm = 65.8820, GNorm = 0.7118, lr_0 = 3.0618e-04
Loss = 6.3938e-02, PNorm = 65.8897, GNorm = 0.4226, lr_0 = 3.0597e-04
Loss = 5.6165e-02, PNorm = 65.8996, GNorm = 0.4665, lr_0 = 3.0576e-04
Loss = 6.1581e-02, PNorm = 65.9046, GNorm = 0.7752, lr_0 = 3.0555e-04
Loss = 6.3888e-02, PNorm = 65.9093, GNorm = 0.5277, lr_0 = 3.0535e-04
Loss = 6.0737e-02, PNorm = 65.9166, GNorm = 0.7268, lr_0 = 3.0514e-04
Loss = 5.7448e-02, PNorm = 65.9227, GNorm = 0.6925, lr_0 = 3.0493e-04
Loss = 6.4850e-02, PNorm = 65.9294, GNorm = 0.7488, lr_0 = 3.0472e-04
Loss = 6.8461e-02, PNorm = 65.9344, GNorm = 0.5046, lr_0 = 3.0451e-04
Loss = 5.8216e-02, PNorm = 65.9373, GNorm = 0.5219, lr_0 = 3.0430e-04
Loss = 6.5384e-02, PNorm = 65.9406, GNorm = 0.7562, lr_0 = 3.0409e-04
Loss = 5.6011e-02, PNorm = 65.9449, GNorm = 0.4694, lr_0 = 3.0388e-04
Loss = 6.1601e-02, PNorm = 65.9529, GNorm = 0.5969, lr_0 = 3.0368e-04
Loss = 6.5152e-02, PNorm = 65.9603, GNorm = 0.7027, lr_0 = 3.0347e-04
Loss = 5.9808e-02, PNorm = 65.9662, GNorm = 0.7732, lr_0 = 3.0326e-04
Loss = 6.2462e-02, PNorm = 65.9722, GNorm = 0.4573, lr_0 = 3.0305e-04
Loss = 6.1928e-02, PNorm = 65.9805, GNorm = 0.5394, lr_0 = 3.0284e-04
Loss = 6.2488e-02, PNorm = 65.9870, GNorm = 0.5344, lr_0 = 3.0264e-04
Loss = 5.6354e-02, PNorm = 65.9935, GNorm = 0.5164, lr_0 = 3.0243e-04
Loss = 7.0727e-02, PNorm = 65.9993, GNorm = 0.6843, lr_0 = 3.0222e-04
Loss = 5.7582e-02, PNorm = 66.0059, GNorm = 0.6137, lr_0 = 3.0202e-04
Loss = 7.6190e-02, PNorm = 66.0138, GNorm = 1.4272, lr_0 = 3.0181e-04
Loss = 6.3163e-02, PNorm = 66.0223, GNorm = 0.5160, lr_0 = 3.0160e-04
Loss = 6.0030e-02, PNorm = 66.0306, GNorm = 0.6537, lr_0 = 3.0140e-04
Loss = 6.5210e-02, PNorm = 66.0346, GNorm = 0.7035, lr_0 = 3.0119e-04
Loss = 6.0837e-02, PNorm = 66.0387, GNorm = 1.1001, lr_0 = 3.0098e-04
Loss = 5.5890e-02, PNorm = 66.0432, GNorm = 0.5068, lr_0 = 3.0078e-04
Loss = 6.3419e-02, PNorm = 66.0500, GNorm = 0.6936, lr_0 = 3.0057e-04
Loss = 6.2567e-02, PNorm = 66.0589, GNorm = 0.7044, lr_0 = 3.0036e-04
Loss = 6.6549e-02, PNorm = 66.0653, GNorm = 0.5670, lr_0 = 3.0016e-04
Loss = 6.7752e-02, PNorm = 66.0709, GNorm = 0.7184, lr_0 = 2.9995e-04
Loss = 5.8501e-02, PNorm = 66.0761, GNorm = 0.5755, lr_0 = 2.9975e-04
Loss = 6.4145e-02, PNorm = 66.0819, GNorm = 0.5964, lr_0 = 2.9954e-04
Loss = 6.0212e-02, PNorm = 66.0893, GNorm = 0.9135, lr_0 = 2.9934e-04
Loss = 7.8777e-02, PNorm = 66.0943, GNorm = 0.7227, lr_0 = 2.9913e-04
Loss = 7.4742e-02, PNorm = 66.0996, GNorm = 0.4303, lr_0 = 2.9893e-04
Loss = 6.1898e-02, PNorm = 66.1063, GNorm = 0.5556, lr_0 = 2.9872e-04
Loss = 5.8880e-02, PNorm = 66.1106, GNorm = 0.4258, lr_0 = 2.9852e-04
Loss = 5.8387e-02, PNorm = 66.1157, GNorm = 0.5281, lr_0 = 2.9831e-04
Loss = 5.9971e-02, PNorm = 66.1201, GNorm = 0.7076, lr_0 = 2.9811e-04
Loss = 6.7590e-02, PNorm = 66.1250, GNorm = 0.8117, lr_0 = 2.9790e-04
Loss = 6.2210e-02, PNorm = 66.1288, GNorm = 0.7521, lr_0 = 2.9770e-04
Loss = 6.4976e-02, PNorm = 66.1332, GNorm = 0.4376, lr_0 = 2.9750e-04
Loss = 5.8812e-02, PNorm = 66.1396, GNorm = 0.4618, lr_0 = 2.9729e-04
Loss = 6.3392e-02, PNorm = 66.1447, GNorm = 0.5392, lr_0 = 2.9709e-04
Loss = 5.6502e-02, PNorm = 66.1474, GNorm = 0.5521, lr_0 = 2.9689e-04
Loss = 6.0136e-02, PNorm = 66.1514, GNorm = 0.4666, lr_0 = 2.9668e-04
Loss = 6.0940e-02, PNorm = 66.1556, GNorm = 0.5372, lr_0 = 2.9648e-04
Loss = 7.2518e-02, PNorm = 66.1573, GNorm = 0.8115, lr_0 = 2.9628e-04
Loss = 6.6613e-02, PNorm = 66.1633, GNorm = 0.5339, lr_0 = 2.9607e-04
Loss = 6.1262e-02, PNorm = 66.1682, GNorm = 0.7612, lr_0 = 2.9587e-04
Loss = 6.1866e-02, PNorm = 66.1754, GNorm = 0.5638, lr_0 = 2.9567e-04
Loss = 5.5544e-02, PNorm = 66.1842, GNorm = 0.5527, lr_0 = 2.9546e-04
Loss = 6.0479e-02, PNorm = 66.1896, GNorm = 0.5698, lr_0 = 2.9526e-04
Loss = 6.0977e-02, PNorm = 66.1961, GNorm = 0.5702, lr_0 = 2.9506e-04
Loss = 6.2262e-02, PNorm = 66.2014, GNorm = 0.6707, lr_0 = 2.9486e-04
Loss = 5.7496e-02, PNorm = 66.2067, GNorm = 0.8319, lr_0 = 2.9466e-04
Loss = 7.5740e-02, PNorm = 66.2120, GNorm = 0.6924, lr_0 = 2.9445e-04
Loss = 7.7398e-02, PNorm = 66.2187, GNorm = 0.4870, lr_0 = 2.9425e-04
Loss = 6.8937e-02, PNorm = 66.2229, GNorm = 1.0048, lr_0 = 2.9405e-04
Loss = 6.7060e-02, PNorm = 66.2299, GNorm = 0.7685, lr_0 = 2.9385e-04
Loss = 5.7602e-02, PNorm = 66.2367, GNorm = 0.5229, lr_0 = 2.9365e-04
Loss = 7.0286e-02, PNorm = 66.2394, GNorm = 0.5209, lr_0 = 2.9345e-04
Loss = 6.5612e-02, PNorm = 66.2430, GNorm = 0.6169, lr_0 = 2.9325e-04
Loss = 5.7459e-02, PNorm = 66.2456, GNorm = 0.5833, lr_0 = 2.9305e-04
Loss = 6.4222e-02, PNorm = 66.2508, GNorm = 0.5484, lr_0 = 2.9284e-04
Loss = 5.3449e-02, PNorm = 66.2564, GNorm = 0.5447, lr_0 = 2.9264e-04
Loss = 5.9181e-02, PNorm = 66.2583, GNorm = 0.7727, lr_0 = 2.9244e-04
Loss = 6.0671e-02, PNorm = 66.2596, GNorm = 0.4980, lr_0 = 2.9224e-04
Loss = 6.0010e-02, PNorm = 66.2644, GNorm = 0.6968, lr_0 = 2.9204e-04
Loss = 7.0420e-02, PNorm = 66.2708, GNorm = 0.8770, lr_0 = 2.9184e-04
Loss = 7.0068e-02, PNorm = 66.2790, GNorm = 0.7173, lr_0 = 2.9164e-04
Loss = 6.3679e-02, PNorm = 66.2851, GNorm = 0.6994, lr_0 = 2.9144e-04
Loss = 6.5747e-02, PNorm = 66.2906, GNorm = 0.6518, lr_0 = 2.9124e-04
Validation mae = 0.408227
Epoch 17
Loss = 5.8408e-02, PNorm = 66.2939, GNorm = 0.5016, lr_0 = 2.9104e-04
Loss = 5.5101e-02, PNorm = 66.3000, GNorm = 0.5398, lr_0 = 2.9084e-04
Loss = 5.5872e-02, PNorm = 66.3051, GNorm = 0.5197, lr_0 = 2.9065e-04
Loss = 5.8935e-02, PNorm = 66.3095, GNorm = 0.8014, lr_0 = 2.9045e-04
Loss = 5.8595e-02, PNorm = 66.3151, GNorm = 0.8193, lr_0 = 2.9025e-04
Loss = 5.7946e-02, PNorm = 66.3216, GNorm = 1.1260, lr_0 = 2.9005e-04
Loss = 4.6107e-02, PNorm = 66.3284, GNorm = 0.7055, lr_0 = 2.8985e-04
Loss = 6.8363e-02, PNorm = 66.3347, GNorm = 0.6762, lr_0 = 2.8965e-04
Loss = 5.8075e-02, PNorm = 66.3419, GNorm = 0.5553, lr_0 = 2.8945e-04
Loss = 5.3870e-02, PNorm = 66.3484, GNorm = 1.4899, lr_0 = 2.8925e-04
Loss = 5.0967e-02, PNorm = 66.3540, GNorm = 0.5879, lr_0 = 2.8906e-04
Loss = 5.3548e-02, PNorm = 66.3580, GNorm = 0.8702, lr_0 = 2.8886e-04
Loss = 4.9096e-02, PNorm = 66.3613, GNorm = 0.2753, lr_0 = 2.8866e-04
Loss = 5.6442e-02, PNorm = 66.3648, GNorm = 0.7354, lr_0 = 2.8846e-04
Loss = 5.3274e-02, PNorm = 66.3681, GNorm = 0.4756, lr_0 = 2.8826e-04
Loss = 6.2665e-02, PNorm = 66.3743, GNorm = 0.3931, lr_0 = 2.8807e-04
Loss = 5.8829e-02, PNorm = 66.3846, GNorm = 0.6670, lr_0 = 2.8787e-04
Loss = 5.3163e-02, PNorm = 66.3925, GNorm = 0.8725, lr_0 = 2.8767e-04
Loss = 5.4697e-02, PNorm = 66.3975, GNorm = 0.5369, lr_0 = 2.8748e-04
Loss = 6.0899e-02, PNorm = 66.4020, GNorm = 0.6537, lr_0 = 2.8728e-04
Loss = 6.0991e-02, PNorm = 66.4085, GNorm = 0.7288, lr_0 = 2.8708e-04
Loss = 5.1332e-02, PNorm = 66.4174, GNorm = 0.6217, lr_0 = 2.8689e-04
Loss = 5.6640e-02, PNorm = 66.4234, GNorm = 0.4550, lr_0 = 2.8669e-04
Loss = 4.5618e-02, PNorm = 66.4295, GNorm = 0.6511, lr_0 = 2.8649e-04
Loss = 6.0129e-02, PNorm = 66.4332, GNorm = 0.7436, lr_0 = 2.8630e-04
Loss = 5.7294e-02, PNorm = 66.4353, GNorm = 0.6224, lr_0 = 2.8610e-04
Loss = 5.3657e-02, PNorm = 66.4414, GNorm = 0.5448, lr_0 = 2.8590e-04
Loss = 6.4042e-02, PNorm = 66.4484, GNorm = 0.6250, lr_0 = 2.8571e-04
Loss = 5.6936e-02, PNorm = 66.4511, GNorm = 0.5185, lr_0 = 2.8551e-04
Loss = 5.6580e-02, PNorm = 66.4566, GNorm = 0.8803, lr_0 = 2.8532e-04
Loss = 5.6678e-02, PNorm = 66.4627, GNorm = 0.6995, lr_0 = 2.8512e-04
Loss = 5.9435e-02, PNorm = 66.4671, GNorm = 0.3837, lr_0 = 2.8493e-04
Loss = 6.1939e-02, PNorm = 66.4728, GNorm = 0.5652, lr_0 = 2.8473e-04
Loss = 6.2811e-02, PNorm = 66.4785, GNorm = 0.4791, lr_0 = 2.8454e-04
Loss = 6.6051e-02, PNorm = 66.4845, GNorm = 0.4182, lr_0 = 2.8434e-04
Loss = 5.9454e-02, PNorm = 66.4902, GNorm = 0.7948, lr_0 = 2.8415e-04
Loss = 5.4944e-02, PNorm = 66.4974, GNorm = 0.5686, lr_0 = 2.8395e-04
Loss = 5.7213e-02, PNorm = 66.5046, GNorm = 0.7191, lr_0 = 2.8376e-04
Loss = 6.0822e-02, PNorm = 66.5111, GNorm = 0.6809, lr_0 = 2.8356e-04
Loss = 5.8901e-02, PNorm = 66.5153, GNorm = 0.5852, lr_0 = 2.8337e-04
Loss = 5.5680e-02, PNorm = 66.5169, GNorm = 0.7194, lr_0 = 2.8317e-04
Loss = 6.5682e-02, PNorm = 66.5204, GNorm = 0.8080, lr_0 = 2.8298e-04
Loss = 5.7544e-02, PNorm = 66.5264, GNorm = 0.5046, lr_0 = 2.8279e-04
Loss = 5.7548e-02, PNorm = 66.5329, GNorm = 0.7930, lr_0 = 2.8259e-04
Loss = 5.6924e-02, PNorm = 66.5357, GNorm = 0.4953, lr_0 = 2.8240e-04
Loss = 6.0010e-02, PNorm = 66.5400, GNorm = 0.5238, lr_0 = 2.8221e-04
Loss = 5.5915e-02, PNorm = 66.5465, GNorm = 0.7124, lr_0 = 2.8201e-04
Loss = 5.1211e-02, PNorm = 66.5488, GNorm = 0.5916, lr_0 = 2.8182e-04
Loss = 6.3496e-02, PNorm = 66.5542, GNorm = 0.8954, lr_0 = 2.8163e-04
Loss = 5.7188e-02, PNorm = 66.5613, GNorm = 0.5822, lr_0 = 2.8143e-04
Loss = 5.6074e-02, PNorm = 66.5666, GNorm = 0.8987, lr_0 = 2.8124e-04
Loss = 6.5916e-02, PNorm = 66.5707, GNorm = 0.6389, lr_0 = 2.8105e-04
Loss = 6.3994e-02, PNorm = 66.5774, GNorm = 0.5008, lr_0 = 2.8085e-04
Loss = 6.2207e-02, PNorm = 66.5815, GNorm = 0.5782, lr_0 = 2.8066e-04
Loss = 5.6545e-02, PNorm = 66.5887, GNorm = 0.4522, lr_0 = 2.8047e-04
Loss = 6.1415e-02, PNorm = 66.5948, GNorm = 0.4026, lr_0 = 2.8028e-04
Loss = 4.7728e-02, PNorm = 66.5996, GNorm = 0.4523, lr_0 = 2.8009e-04
Loss = 5.9839e-02, PNorm = 66.6014, GNorm = 0.6264, lr_0 = 2.7989e-04
Loss = 6.2308e-02, PNorm = 66.6054, GNorm = 0.5336, lr_0 = 2.7970e-04
Loss = 6.6756e-02, PNorm = 66.6085, GNorm = 0.4981, lr_0 = 2.7951e-04
Loss = 5.8337e-02, PNorm = 66.6149, GNorm = 0.4941, lr_0 = 2.7932e-04
Loss = 5.0969e-02, PNorm = 66.6214, GNorm = 0.8606, lr_0 = 2.7913e-04
Loss = 6.4901e-02, PNorm = 66.6249, GNorm = 0.8490, lr_0 = 2.7894e-04
Loss = 6.7059e-02, PNorm = 66.6297, GNorm = 0.6057, lr_0 = 2.7875e-04
Loss = 6.0464e-02, PNorm = 66.6369, GNorm = 0.7601, lr_0 = 2.7855e-04
Loss = 5.8306e-02, PNorm = 66.6422, GNorm = 0.5643, lr_0 = 2.7836e-04
Loss = 5.6680e-02, PNorm = 66.6474, GNorm = 0.4336, lr_0 = 2.7817e-04
Loss = 6.6766e-02, PNorm = 66.6512, GNorm = 0.5760, lr_0 = 2.7798e-04
Loss = 5.1774e-02, PNorm = 66.6568, GNorm = 0.5394, lr_0 = 2.7779e-04
Loss = 5.1607e-02, PNorm = 66.6614, GNorm = 0.5975, lr_0 = 2.7760e-04
Loss = 6.3202e-02, PNorm = 66.6666, GNorm = 0.5593, lr_0 = 2.7741e-04
Loss = 6.1859e-02, PNorm = 66.6742, GNorm = 0.4735, lr_0 = 2.7722e-04
Loss = 5.6097e-02, PNorm = 66.6799, GNorm = 0.9156, lr_0 = 2.7703e-04
Loss = 5.2739e-02, PNorm = 66.6846, GNorm = 0.6945, lr_0 = 2.7684e-04
Loss = 5.8271e-02, PNorm = 66.6888, GNorm = 0.4741, lr_0 = 2.7665e-04
Loss = 5.9599e-02, PNorm = 66.6935, GNorm = 0.6867, lr_0 = 2.7646e-04
Loss = 5.7784e-02, PNorm = 66.6976, GNorm = 0.5294, lr_0 = 2.7627e-04
Loss = 6.4552e-02, PNorm = 66.7039, GNorm = 1.0166, lr_0 = 2.7608e-04
Loss = 5.3058e-02, PNorm = 66.7097, GNorm = 0.4405, lr_0 = 2.7590e-04
Loss = 6.3505e-02, PNorm = 66.7148, GNorm = 0.7364, lr_0 = 2.7571e-04
Loss = 5.1704e-02, PNorm = 66.7202, GNorm = 0.5295, lr_0 = 2.7552e-04
Loss = 5.8914e-02, PNorm = 66.7236, GNorm = 0.4285, lr_0 = 2.7533e-04
Loss = 5.2019e-02, PNorm = 66.7287, GNorm = 0.5836, lr_0 = 2.7514e-04
Loss = 5.3085e-02, PNorm = 66.7329, GNorm = 0.4566, lr_0 = 2.7495e-04
Loss = 6.0227e-02, PNorm = 66.7378, GNorm = 0.6594, lr_0 = 2.7476e-04
Loss = 5.6605e-02, PNorm = 66.7415, GNorm = 0.7258, lr_0 = 2.7457e-04
Loss = 5.4678e-02, PNorm = 66.7434, GNorm = 0.9523, lr_0 = 2.7439e-04
Loss = 5.9807e-02, PNorm = 66.7480, GNorm = 1.1184, lr_0 = 2.7420e-04
Loss = 6.3097e-02, PNorm = 66.7529, GNorm = 0.5649, lr_0 = 2.7401e-04
Loss = 5.7022e-02, PNorm = 66.7588, GNorm = 0.5870, lr_0 = 2.7382e-04
Loss = 5.7889e-02, PNorm = 66.7643, GNorm = 0.4829, lr_0 = 2.7364e-04
Loss = 5.8720e-02, PNorm = 66.7715, GNorm = 0.5939, lr_0 = 2.7345e-04
Loss = 6.4864e-02, PNorm = 66.7770, GNorm = 0.7563, lr_0 = 2.7326e-04
Loss = 6.3734e-02, PNorm = 66.7825, GNorm = 0.7528, lr_0 = 2.7307e-04
Loss = 6.1766e-02, PNorm = 66.7846, GNorm = 0.8865, lr_0 = 2.7289e-04
Loss = 6.2295e-02, PNorm = 66.7888, GNorm = 0.7665, lr_0 = 2.7270e-04
Loss = 5.9800e-02, PNorm = 66.7949, GNorm = 0.3969, lr_0 = 2.7251e-04
Loss = 6.2543e-02, PNorm = 66.8022, GNorm = 0.5903, lr_0 = 2.7233e-04
Loss = 5.7333e-02, PNorm = 66.8063, GNorm = 0.7266, lr_0 = 2.7214e-04
Loss = 5.9820e-02, PNorm = 66.8128, GNorm = 0.7412, lr_0 = 2.7195e-04
Loss = 6.5466e-02, PNorm = 66.8190, GNorm = 0.5438, lr_0 = 2.7177e-04
Loss = 5.7314e-02, PNorm = 66.8218, GNorm = 0.9005, lr_0 = 2.7158e-04
Loss = 5.5432e-02, PNorm = 66.8242, GNorm = 0.4721, lr_0 = 2.7139e-04
Loss = 6.1157e-02, PNorm = 66.8302, GNorm = 0.5326, lr_0 = 2.7121e-04
Loss = 6.3758e-02, PNorm = 66.8347, GNorm = 0.5455, lr_0 = 2.7102e-04
Loss = 6.1319e-02, PNorm = 66.8395, GNorm = 0.4947, lr_0 = 2.7084e-04
Loss = 7.0012e-02, PNorm = 66.8451, GNorm = 0.6052, lr_0 = 2.7065e-04
Loss = 5.9447e-02, PNorm = 66.8499, GNorm = 0.5967, lr_0 = 2.7047e-04
Loss = 5.8421e-02, PNorm = 66.8536, GNorm = 0.5467, lr_0 = 2.7028e-04
Loss = 6.5934e-02, PNorm = 66.8571, GNorm = 0.5171, lr_0 = 2.7010e-04
Loss = 4.8049e-02, PNorm = 66.8607, GNorm = 0.6786, lr_0 = 2.6991e-04
Loss = 5.3683e-02, PNorm = 66.8652, GNorm = 0.8639, lr_0 = 2.6973e-04
Loss = 6.0210e-02, PNorm = 66.8720, GNorm = 0.4240, lr_0 = 2.6954e-04
Loss = 5.3364e-02, PNorm = 66.8781, GNorm = 0.8530, lr_0 = 2.6936e-04
Loss = 6.3701e-02, PNorm = 66.8832, GNorm = 0.8920, lr_0 = 2.6917e-04
Loss = 6.3845e-02, PNorm = 66.8868, GNorm = 0.5004, lr_0 = 2.6899e-04
Loss = 6.0340e-02, PNorm = 66.8907, GNorm = 0.6446, lr_0 = 2.6880e-04
Loss = 6.1453e-02, PNorm = 66.8965, GNorm = 0.6016, lr_0 = 2.6862e-04
Loss = 5.8195e-02, PNorm = 66.9006, GNorm = 0.6409, lr_0 = 2.6844e-04
Loss = 6.6193e-02, PNorm = 66.9033, GNorm = 0.8439, lr_0 = 2.6825e-04
Validation mae = 0.382914
Epoch 18
Loss = 4.6504e-02, PNorm = 66.9082, GNorm = 0.5819, lr_0 = 2.6807e-04
Loss = 6.2174e-02, PNorm = 66.9137, GNorm = 0.8287, lr_0 = 2.6788e-04
Loss = 4.9025e-02, PNorm = 66.9190, GNorm = 0.6221, lr_0 = 2.6770e-04
Loss = 5.5330e-02, PNorm = 66.9261, GNorm = 0.6973, lr_0 = 2.6752e-04
Loss = 5.4839e-02, PNorm = 66.9340, GNorm = 0.5165, lr_0 = 2.6733e-04
Loss = 4.5319e-02, PNorm = 66.9404, GNorm = 0.4945, lr_0 = 2.6715e-04
Loss = 4.5749e-02, PNorm = 66.9434, GNorm = 0.5039, lr_0 = 2.6697e-04
Loss = 5.5076e-02, PNorm = 66.9481, GNorm = 0.5034, lr_0 = 2.6678e-04
Loss = 5.1233e-02, PNorm = 66.9533, GNorm = 0.8810, lr_0 = 2.6660e-04
Loss = 5.2841e-02, PNorm = 66.9573, GNorm = 0.4197, lr_0 = 2.6642e-04
Loss = 5.4027e-02, PNorm = 66.9619, GNorm = 0.4307, lr_0 = 2.6624e-04
Loss = 4.2396e-02, PNorm = 66.9666, GNorm = 0.4912, lr_0 = 2.6605e-04
Loss = 5.6655e-02, PNorm = 66.9729, GNorm = 0.4259, lr_0 = 2.6587e-04
Loss = 5.1574e-02, PNorm = 66.9760, GNorm = 0.5721, lr_0 = 2.6569e-04
Loss = 5.1974e-02, PNorm = 66.9815, GNorm = 0.5819, lr_0 = 2.6551e-04
Loss = 5.1743e-02, PNorm = 66.9894, GNorm = 0.4820, lr_0 = 2.6533e-04
Loss = 5.6302e-02, PNorm = 66.9968, GNorm = 0.6469, lr_0 = 2.6514e-04
Loss = 5.1493e-02, PNorm = 67.0025, GNorm = 0.5389, lr_0 = 2.6496e-04
Loss = 4.9030e-02, PNorm = 67.0058, GNorm = 0.6008, lr_0 = 2.6478e-04
Loss = 4.7634e-02, PNorm = 67.0083, GNorm = 0.4705, lr_0 = 2.6460e-04
Loss = 5.1152e-02, PNorm = 67.0135, GNorm = 0.7687, lr_0 = 2.6442e-04
Loss = 5.1169e-02, PNorm = 67.0189, GNorm = 0.5526, lr_0 = 2.6424e-04
Loss = 4.6887e-02, PNorm = 67.0219, GNorm = 0.6125, lr_0 = 2.6406e-04
Loss = 5.1094e-02, PNorm = 67.0265, GNorm = 0.4712, lr_0 = 2.6388e-04
Loss = 4.9036e-02, PNorm = 67.0318, GNorm = 0.3998, lr_0 = 2.6369e-04
Loss = 4.8725e-02, PNorm = 67.0362, GNorm = 0.8815, lr_0 = 2.6351e-04
Loss = 5.8447e-02, PNorm = 67.0417, GNorm = 0.8825, lr_0 = 2.6333e-04
Loss = 5.7400e-02, PNorm = 67.0497, GNorm = 0.5024, lr_0 = 2.6315e-04
Loss = 5.0487e-02, PNorm = 67.0543, GNorm = 0.5681, lr_0 = 2.6297e-04
Loss = 5.5033e-02, PNorm = 67.0592, GNorm = 0.9490, lr_0 = 2.6279e-04
Loss = 5.4129e-02, PNorm = 67.0654, GNorm = 0.5544, lr_0 = 2.6261e-04
Loss = 5.7202e-02, PNorm = 67.0674, GNorm = 0.5442, lr_0 = 2.6243e-04
Loss = 5.4362e-02, PNorm = 67.0720, GNorm = 0.4244, lr_0 = 2.6225e-04
Loss = 5.5967e-02, PNorm = 67.0771, GNorm = 0.5583, lr_0 = 2.6207e-04
Loss = 5.2700e-02, PNorm = 67.0814, GNorm = 0.5621, lr_0 = 2.6189e-04
Loss = 5.6904e-02, PNorm = 67.0859, GNorm = 0.6480, lr_0 = 2.6171e-04
Loss = 4.8904e-02, PNorm = 67.0906, GNorm = 0.3783, lr_0 = 2.6153e-04
Loss = 5.2858e-02, PNorm = 67.0939, GNorm = 0.5162, lr_0 = 2.6136e-04
Loss = 5.3392e-02, PNorm = 67.1004, GNorm = 0.5755, lr_0 = 2.6118e-04
Loss = 5.6335e-02, PNorm = 67.1061, GNorm = 0.4567, lr_0 = 2.6100e-04
Loss = 6.0092e-02, PNorm = 67.1132, GNorm = 0.6405, lr_0 = 2.6082e-04
Loss = 6.5916e-02, PNorm = 67.1169, GNorm = 0.6619, lr_0 = 2.6064e-04
Loss = 5.2388e-02, PNorm = 67.1231, GNorm = 0.6650, lr_0 = 2.6046e-04
Loss = 4.7678e-02, PNorm = 67.1270, GNorm = 0.3597, lr_0 = 2.6028e-04
Loss = 4.9247e-02, PNorm = 67.1302, GNorm = 0.4480, lr_0 = 2.6011e-04
Loss = 5.8878e-02, PNorm = 67.1325, GNorm = 0.4845, lr_0 = 2.5993e-04
Loss = 5.6765e-02, PNorm = 67.1351, GNorm = 0.8191, lr_0 = 2.5975e-04
Loss = 4.9934e-02, PNorm = 67.1384, GNorm = 0.7061, lr_0 = 2.5957e-04
Loss = 6.7776e-02, PNorm = 67.1437, GNorm = 0.5737, lr_0 = 2.5939e-04
Loss = 5.8894e-02, PNorm = 67.1504, GNorm = 0.5954, lr_0 = 2.5922e-04
Loss = 5.8749e-02, PNorm = 67.1560, GNorm = 0.6909, lr_0 = 2.5904e-04
Loss = 6.1198e-02, PNorm = 67.1613, GNorm = 0.3934, lr_0 = 2.5886e-04
Loss = 6.5497e-02, PNorm = 67.1663, GNorm = 0.8780, lr_0 = 2.5868e-04
Loss = 5.5805e-02, PNorm = 67.1697, GNorm = 0.7907, lr_0 = 2.5851e-04
Loss = 7.0538e-02, PNorm = 67.1756, GNorm = 0.6495, lr_0 = 2.5833e-04
Loss = 6.0680e-02, PNorm = 67.1818, GNorm = 0.6951, lr_0 = 2.5815e-04
Loss = 5.4541e-02, PNorm = 67.1873, GNorm = 0.5302, lr_0 = 2.5797e-04
Loss = 4.9854e-02, PNorm = 67.1926, GNorm = 0.7021, lr_0 = 2.5780e-04
Loss = 5.6542e-02, PNorm = 67.1981, GNorm = 0.8382, lr_0 = 2.5762e-04
Loss = 5.6630e-02, PNorm = 67.2038, GNorm = 0.4910, lr_0 = 2.5745e-04
Loss = 5.8061e-02, PNorm = 67.2092, GNorm = 0.5307, lr_0 = 2.5727e-04
Loss = 5.7183e-02, PNorm = 67.2143, GNorm = 0.6055, lr_0 = 2.5709e-04
Loss = 5.3807e-02, PNorm = 67.2195, GNorm = 0.6283, lr_0 = 2.5692e-04
Loss = 5.4415e-02, PNorm = 67.2231, GNorm = 0.4953, lr_0 = 2.5674e-04
Loss = 5.9789e-02, PNorm = 67.2265, GNorm = 0.7496, lr_0 = 2.5656e-04
Loss = 5.6069e-02, PNorm = 67.2302, GNorm = 0.7217, lr_0 = 2.5639e-04
Loss = 4.8295e-02, PNorm = 67.2346, GNorm = 0.5618, lr_0 = 2.5621e-04
Loss = 5.3077e-02, PNorm = 67.2378, GNorm = 0.6119, lr_0 = 2.5604e-04
Loss = 5.0259e-02, PNorm = 67.2425, GNorm = 0.5294, lr_0 = 2.5586e-04
Loss = 5.7351e-02, PNorm = 67.2499, GNorm = 0.5946, lr_0 = 2.5569e-04
Loss = 5.4752e-02, PNorm = 67.2566, GNorm = 0.4866, lr_0 = 2.5551e-04
Loss = 5.7421e-02, PNorm = 67.2622, GNorm = 1.5783, lr_0 = 2.5534e-04
Loss = 6.3681e-02, PNorm = 67.2660, GNorm = 0.4905, lr_0 = 2.5516e-04
Loss = 5.3613e-02, PNorm = 67.2697, GNorm = 0.6238, lr_0 = 2.5499e-04
Loss = 6.1276e-02, PNorm = 67.2737, GNorm = 0.7055, lr_0 = 2.5481e-04
Loss = 5.3752e-02, PNorm = 67.2797, GNorm = 0.5679, lr_0 = 2.5464e-04
Loss = 6.2194e-02, PNorm = 67.2847, GNorm = 0.5778, lr_0 = 2.5446e-04
Loss = 4.4700e-02, PNorm = 67.2913, GNorm = 0.4047, lr_0 = 2.5429e-04
Loss = 5.2028e-02, PNorm = 67.2947, GNorm = 0.4433, lr_0 = 2.5411e-04
Loss = 5.7494e-02, PNorm = 67.2979, GNorm = 0.5336, lr_0 = 2.5394e-04
Loss = 4.6711e-02, PNorm = 67.3011, GNorm = 0.4176, lr_0 = 2.5377e-04
Loss = 4.9271e-02, PNorm = 67.3024, GNorm = 0.4289, lr_0 = 2.5359e-04
Loss = 5.4486e-02, PNorm = 67.3054, GNorm = 0.4651, lr_0 = 2.5342e-04
Loss = 5.9521e-02, PNorm = 67.3086, GNorm = 0.6024, lr_0 = 2.5325e-04
Loss = 5.1663e-02, PNorm = 67.3121, GNorm = 0.4462, lr_0 = 2.5307e-04
Loss = 5.3130e-02, PNorm = 67.3158, GNorm = 0.6372, lr_0 = 2.5290e-04
Loss = 6.1492e-02, PNorm = 67.3215, GNorm = 0.5453, lr_0 = 2.5273e-04
Loss = 5.9782e-02, PNorm = 67.3267, GNorm = 0.6420, lr_0 = 2.5255e-04
Loss = 5.0004e-02, PNorm = 67.3321, GNorm = 0.5309, lr_0 = 2.5238e-04
Loss = 5.3378e-02, PNorm = 67.3355, GNorm = 0.4999, lr_0 = 2.5221e-04
Loss = 6.9128e-02, PNorm = 67.3397, GNorm = 0.6774, lr_0 = 2.5203e-04
Loss = 4.9861e-02, PNorm = 67.3454, GNorm = 0.7196, lr_0 = 2.5186e-04
Loss = 6.3453e-02, PNorm = 67.3539, GNorm = 0.6380, lr_0 = 2.5169e-04
Loss = 5.9431e-02, PNorm = 67.3592, GNorm = 0.6625, lr_0 = 2.5152e-04
Loss = 5.9371e-02, PNorm = 67.3629, GNorm = 0.4913, lr_0 = 2.5134e-04
Loss = 6.3557e-02, PNorm = 67.3669, GNorm = 0.4596, lr_0 = 2.5117e-04
Loss = 5.4457e-02, PNorm = 67.3723, GNorm = 0.5400, lr_0 = 2.5100e-04
Loss = 4.8621e-02, PNorm = 67.3766, GNorm = 0.6240, lr_0 = 2.5083e-04
Loss = 5.6899e-02, PNorm = 67.3803, GNorm = 0.7306, lr_0 = 2.5066e-04
Loss = 5.8504e-02, PNorm = 67.3840, GNorm = 0.5855, lr_0 = 2.5048e-04
Loss = 5.4528e-02, PNorm = 67.3870, GNorm = 0.5692, lr_0 = 2.5031e-04
Loss = 6.1458e-02, PNorm = 67.3905, GNorm = 0.5610, lr_0 = 2.5014e-04
Loss = 5.4795e-02, PNorm = 67.3942, GNorm = 0.6104, lr_0 = 2.4997e-04
Loss = 5.6775e-02, PNorm = 67.3994, GNorm = 0.5208, lr_0 = 2.4980e-04
Loss = 5.4582e-02, PNorm = 67.4014, GNorm = 0.6650, lr_0 = 2.4963e-04
Loss = 6.0377e-02, PNorm = 67.4028, GNorm = 0.5214, lr_0 = 2.4946e-04
Loss = 5.4840e-02, PNorm = 67.4063, GNorm = 0.4776, lr_0 = 2.4929e-04
Loss = 5.0940e-02, PNorm = 67.4117, GNorm = 0.6070, lr_0 = 2.4911e-04
Loss = 6.0896e-02, PNorm = 67.4175, GNorm = 0.4097, lr_0 = 2.4894e-04
Loss = 5.5117e-02, PNorm = 67.4225, GNorm = 0.5445, lr_0 = 2.4877e-04
Loss = 5.1694e-02, PNorm = 67.4268, GNorm = 0.4109, lr_0 = 2.4860e-04
Loss = 4.9024e-02, PNorm = 67.4315, GNorm = 0.6515, lr_0 = 2.4843e-04
Loss = 5.0614e-02, PNorm = 67.4339, GNorm = 0.8501, lr_0 = 2.4826e-04
Loss = 5.9043e-02, PNorm = 67.4386, GNorm = 0.4920, lr_0 = 2.4809e-04
Loss = 5.3736e-02, PNorm = 67.4434, GNorm = 0.4677, lr_0 = 2.4792e-04
Loss = 4.8871e-02, PNorm = 67.4479, GNorm = 0.4076, lr_0 = 2.4775e-04
Loss = 5.5301e-02, PNorm = 67.4519, GNorm = 0.7728, lr_0 = 2.4758e-04
Loss = 5.4917e-02, PNorm = 67.4564, GNorm = 0.8779, lr_0 = 2.4741e-04
Loss = 5.3394e-02, PNorm = 67.4612, GNorm = 0.8781, lr_0 = 2.4724e-04
Loss = 5.9877e-02, PNorm = 67.4675, GNorm = 0.6871, lr_0 = 2.4707e-04
Validation mae = 0.390338
Epoch 19
Loss = 5.1532e-02, PNorm = 67.4721, GNorm = 0.6936, lr_0 = 2.4690e-04
Loss = 4.9129e-02, PNorm = 67.4782, GNorm = 0.5382, lr_0 = 2.4674e-04
Loss = 5.4096e-02, PNorm = 67.4832, GNorm = 0.4242, lr_0 = 2.4657e-04
Loss = 5.5019e-02, PNorm = 67.4895, GNorm = 0.5536, lr_0 = 2.4640e-04
Loss = 5.4053e-02, PNorm = 67.4940, GNorm = 0.5768, lr_0 = 2.4623e-04
Loss = 5.0221e-02, PNorm = 67.4993, GNorm = 0.5243, lr_0 = 2.4606e-04
Loss = 5.6271e-02, PNorm = 67.5051, GNorm = 0.5729, lr_0 = 2.4589e-04
Loss = 5.2552e-02, PNorm = 67.5094, GNorm = 0.7100, lr_0 = 2.4572e-04
Loss = 5.3114e-02, PNorm = 67.5128, GNorm = 0.4852, lr_0 = 2.4556e-04
Loss = 5.2404e-02, PNorm = 67.5191, GNorm = 0.5099, lr_0 = 2.4539e-04
Loss = 4.2883e-02, PNorm = 67.5261, GNorm = 0.4716, lr_0 = 2.4522e-04
Loss = 4.5662e-02, PNorm = 67.5304, GNorm = 0.5514, lr_0 = 2.4505e-04
Loss = 4.7506e-02, PNorm = 67.5312, GNorm = 0.5355, lr_0 = 2.4488e-04
Loss = 5.2744e-02, PNorm = 67.5334, GNorm = 0.4607, lr_0 = 2.4472e-04
Loss = 5.1791e-02, PNorm = 67.5376, GNorm = 0.7031, lr_0 = 2.4455e-04
Loss = 4.2339e-02, PNorm = 67.5412, GNorm = 0.4015, lr_0 = 2.4438e-04
Loss = 4.7504e-02, PNorm = 67.5458, GNorm = 0.5155, lr_0 = 2.4421e-04
Loss = 5.2879e-02, PNorm = 67.5501, GNorm = 0.4923, lr_0 = 2.4405e-04
Loss = 5.3508e-02, PNorm = 67.5553, GNorm = 0.5760, lr_0 = 2.4388e-04
Loss = 4.6142e-02, PNorm = 67.5613, GNorm = 0.5203, lr_0 = 2.4371e-04
Loss = 4.9031e-02, PNorm = 67.5665, GNorm = 0.6012, lr_0 = 2.4354e-04
Loss = 4.8051e-02, PNorm = 67.5722, GNorm = 0.7167, lr_0 = 2.4338e-04
Loss = 4.8713e-02, PNorm = 67.5783, GNorm = 0.7648, lr_0 = 2.4321e-04
Loss = 4.9088e-02, PNorm = 67.5831, GNorm = 0.7023, lr_0 = 2.4304e-04
Loss = 5.4132e-02, PNorm = 67.5882, GNorm = 0.4735, lr_0 = 2.4288e-04
Loss = 5.5888e-02, PNorm = 67.5910, GNorm = 0.7025, lr_0 = 2.4271e-04
Loss = 5.4454e-02, PNorm = 67.5968, GNorm = 0.5760, lr_0 = 2.4254e-04
Loss = 5.7783e-02, PNorm = 67.6020, GNorm = 0.5751, lr_0 = 2.4238e-04
Loss = 4.9302e-02, PNorm = 67.6065, GNorm = 0.4982, lr_0 = 2.4221e-04
Loss = 5.2967e-02, PNorm = 67.6105, GNorm = 0.5741, lr_0 = 2.4205e-04
Loss = 4.9528e-02, PNorm = 67.6148, GNorm = 0.5369, lr_0 = 2.4188e-04
Loss = 6.4103e-02, PNorm = 67.6226, GNorm = 0.5950, lr_0 = 2.4171e-04
Loss = 5.3659e-02, PNorm = 67.6282, GNorm = 0.4827, lr_0 = 2.4155e-04
Loss = 5.4255e-02, PNorm = 67.6319, GNorm = 0.6950, lr_0 = 2.4138e-04
Loss = 6.2976e-02, PNorm = 67.6353, GNorm = 0.4790, lr_0 = 2.4122e-04
Loss = 5.5991e-02, PNorm = 67.6381, GNorm = 1.1448, lr_0 = 2.4105e-04
Loss = 5.5618e-02, PNorm = 67.6410, GNorm = 0.9160, lr_0 = 2.4089e-04
Loss = 4.8745e-02, PNorm = 67.6466, GNorm = 0.5615, lr_0 = 2.4072e-04
Loss = 6.5124e-02, PNorm = 67.6513, GNorm = 0.6277, lr_0 = 2.4056e-04
Loss = 4.9552e-02, PNorm = 67.6549, GNorm = 0.7039, lr_0 = 2.4039e-04
Loss = 5.5059e-02, PNorm = 67.6606, GNorm = 0.6839, lr_0 = 2.4023e-04
Loss = 5.6668e-02, PNorm = 67.6662, GNorm = 0.6123, lr_0 = 2.4006e-04
Loss = 5.9966e-02, PNorm = 67.6732, GNorm = 0.4991, lr_0 = 2.3990e-04
Loss = 6.0443e-02, PNorm = 67.6780, GNorm = 0.8826, lr_0 = 2.3974e-04
Loss = 4.7289e-02, PNorm = 67.6837, GNorm = 0.4597, lr_0 = 2.3957e-04
Loss = 4.5033e-02, PNorm = 67.6882, GNorm = 0.8885, lr_0 = 2.3941e-04
Loss = 4.4320e-02, PNorm = 67.6928, GNorm = 0.8882, lr_0 = 2.3924e-04
Loss = 5.2412e-02, PNorm = 67.6966, GNorm = 0.5282, lr_0 = 2.3908e-04
Loss = 4.8834e-02, PNorm = 67.7003, GNorm = 0.5002, lr_0 = 2.3892e-04
Loss = 4.5884e-02, PNorm = 67.7034, GNorm = 0.6838, lr_0 = 2.3875e-04
Loss = 4.9150e-02, PNorm = 67.7084, GNorm = 0.8339, lr_0 = 2.3859e-04
Loss = 4.6431e-02, PNorm = 67.7129, GNorm = 0.8065, lr_0 = 2.3842e-04
Loss = 5.5463e-02, PNorm = 67.7164, GNorm = 0.5311, lr_0 = 2.3826e-04
Loss = 5.2090e-02, PNorm = 67.7205, GNorm = 0.3857, lr_0 = 2.3810e-04
Loss = 4.8960e-02, PNorm = 67.7253, GNorm = 0.5941, lr_0 = 2.3794e-04
Loss = 4.9132e-02, PNorm = 67.7292, GNorm = 0.5474, lr_0 = 2.3777e-04
Loss = 4.6106e-02, PNorm = 67.7319, GNorm = 0.4945, lr_0 = 2.3761e-04
Loss = 5.1550e-02, PNorm = 67.7359, GNorm = 0.6230, lr_0 = 2.3745e-04
Loss = 5.0052e-02, PNorm = 67.7395, GNorm = 0.5655, lr_0 = 2.3728e-04
Loss = 5.1639e-02, PNorm = 67.7430, GNorm = 0.4594, lr_0 = 2.3712e-04
Loss = 4.5404e-02, PNorm = 67.7483, GNorm = 0.4171, lr_0 = 2.3696e-04
Loss = 4.7492e-02, PNorm = 67.7499, GNorm = 0.4984, lr_0 = 2.3680e-04
Loss = 5.2786e-02, PNorm = 67.7529, GNorm = 0.4436, lr_0 = 2.3663e-04
Loss = 5.4203e-02, PNorm = 67.7559, GNorm = 0.7276, lr_0 = 2.3647e-04
Loss = 5.7711e-02, PNorm = 67.7604, GNorm = 0.6180, lr_0 = 2.3631e-04
Loss = 5.2574e-02, PNorm = 67.7649, GNorm = 0.4653, lr_0 = 2.3615e-04
Loss = 5.8979e-02, PNorm = 67.7673, GNorm = 0.5368, lr_0 = 2.3599e-04
Loss = 5.5412e-02, PNorm = 67.7699, GNorm = 0.5916, lr_0 = 2.3582e-04
Loss = 5.8338e-02, PNorm = 67.7743, GNorm = 0.5141, lr_0 = 2.3566e-04
Loss = 5.7741e-02, PNorm = 67.7783, GNorm = 0.6548, lr_0 = 2.3550e-04
Loss = 4.9331e-02, PNorm = 67.7832, GNorm = 0.4678, lr_0 = 2.3534e-04
Loss = 5.2772e-02, PNorm = 67.7867, GNorm = 0.6442, lr_0 = 2.3518e-04
Loss = 5.2812e-02, PNorm = 67.7897, GNorm = 0.6485, lr_0 = 2.3502e-04
Loss = 6.3685e-02, PNorm = 67.7941, GNorm = 0.6587, lr_0 = 2.3486e-04
Loss = 5.7012e-02, PNorm = 67.8008, GNorm = 0.7542, lr_0 = 2.3470e-04
Loss = 6.1312e-02, PNorm = 67.8056, GNorm = 0.8642, lr_0 = 2.3454e-04
Loss = 4.6659e-02, PNorm = 67.8091, GNorm = 0.4709, lr_0 = 2.3437e-04
Loss = 5.5093e-02, PNorm = 67.8140, GNorm = 0.4968, lr_0 = 2.3421e-04
Loss = 5.6741e-02, PNorm = 67.8182, GNorm = 0.5659, lr_0 = 2.3405e-04
Loss = 4.4531e-02, PNorm = 67.8236, GNorm = 0.4237, lr_0 = 2.3389e-04
Loss = 4.8024e-02, PNorm = 67.8287, GNorm = 0.4711, lr_0 = 2.3373e-04
Loss = 4.9796e-02, PNorm = 67.8327, GNorm = 0.7448, lr_0 = 2.3357e-04
Loss = 5.2669e-02, PNorm = 67.8348, GNorm = 0.6164, lr_0 = 2.3341e-04
Loss = 4.4450e-02, PNorm = 67.8378, GNorm = 0.4738, lr_0 = 2.3325e-04
Loss = 5.5337e-02, PNorm = 67.8410, GNorm = 0.4585, lr_0 = 2.3309e-04
Loss = 5.1895e-02, PNorm = 67.8443, GNorm = 0.6487, lr_0 = 2.3293e-04
Loss = 4.4062e-02, PNorm = 67.8478, GNorm = 0.3676, lr_0 = 2.3277e-04
Loss = 4.3646e-02, PNorm = 67.8515, GNorm = 0.4573, lr_0 = 2.3261e-04
Loss = 5.1615e-02, PNorm = 67.8552, GNorm = 0.5682, lr_0 = 2.3246e-04
Loss = 4.7579e-02, PNorm = 67.8591, GNorm = 0.5847, lr_0 = 2.3230e-04
Loss = 5.3845e-02, PNorm = 67.8627, GNorm = 0.5023, lr_0 = 2.3214e-04
Loss = 4.9033e-02, PNorm = 67.8675, GNorm = 0.5392, lr_0 = 2.3198e-04
Loss = 4.9376e-02, PNorm = 67.8713, GNorm = 0.7725, lr_0 = 2.3182e-04
Loss = 4.7843e-02, PNorm = 67.8760, GNorm = 0.3931, lr_0 = 2.3166e-04
Loss = 5.3733e-02, PNorm = 67.8776, GNorm = 0.4323, lr_0 = 2.3150e-04
Loss = 5.0651e-02, PNorm = 67.8809, GNorm = 0.5160, lr_0 = 2.3134e-04
Loss = 4.7490e-02, PNorm = 67.8856, GNorm = 0.4406, lr_0 = 2.3118e-04
Loss = 4.8986e-02, PNorm = 67.8900, GNorm = 0.5979, lr_0 = 2.3103e-04
Loss = 5.3971e-02, PNorm = 67.8934, GNorm = 0.6740, lr_0 = 2.3087e-04
Loss = 5.5615e-02, PNorm = 67.8988, GNorm = 0.3823, lr_0 = 2.3071e-04
Loss = 5.6819e-02, PNorm = 67.9021, GNorm = 0.6271, lr_0 = 2.3055e-04
Loss = 5.4201e-02, PNorm = 67.9058, GNorm = 0.7013, lr_0 = 2.3039e-04
Loss = 4.6899e-02, PNorm = 67.9087, GNorm = 0.6573, lr_0 = 2.3024e-04
Loss = 5.2068e-02, PNorm = 67.9106, GNorm = 0.8233, lr_0 = 2.3008e-04
Loss = 4.9502e-02, PNorm = 67.9139, GNorm = 0.4993, lr_0 = 2.2992e-04
Loss = 4.7608e-02, PNorm = 67.9176, GNorm = 0.6010, lr_0 = 2.2976e-04
Loss = 5.9982e-02, PNorm = 67.9224, GNorm = 0.7547, lr_0 = 2.2961e-04
Loss = 4.8078e-02, PNorm = 67.9269, GNorm = 0.7807, lr_0 = 2.2945e-04
Loss = 4.8357e-02, PNorm = 67.9293, GNorm = 0.4640, lr_0 = 2.2929e-04
Loss = 4.0311e-02, PNorm = 67.9308, GNorm = 0.5080, lr_0 = 2.2913e-04
Loss = 5.5943e-02, PNorm = 67.9328, GNorm = 0.6784, lr_0 = 2.2898e-04
Loss = 4.6510e-02, PNorm = 67.9350, GNorm = 0.3757, lr_0 = 2.2882e-04
Loss = 4.8804e-02, PNorm = 67.9381, GNorm = 0.8035, lr_0 = 2.2866e-04
Loss = 4.8674e-02, PNorm = 67.9412, GNorm = 0.9584, lr_0 = 2.2851e-04
Loss = 4.9143e-02, PNorm = 67.9434, GNorm = 0.4722, lr_0 = 2.2835e-04
Loss = 4.6253e-02, PNorm = 67.9479, GNorm = 0.4899, lr_0 = 2.2819e-04
Loss = 4.9056e-02, PNorm = 67.9518, GNorm = 0.5016, lr_0 = 2.2804e-04
Loss = 5.2696e-02, PNorm = 67.9545, GNorm = 0.6386, lr_0 = 2.2788e-04
Loss = 4.9434e-02, PNorm = 67.9571, GNorm = 0.6337, lr_0 = 2.2773e-04
Loss = 4.6201e-02, PNorm = 67.9609, GNorm = 0.6061, lr_0 = 2.2757e-04
Validation mae = 0.393250
Epoch 20
Loss = 4.5144e-02, PNorm = 67.9647, GNorm = 0.7513, lr_0 = 2.2741e-04
Loss = 4.6706e-02, PNorm = 67.9674, GNorm = 0.6085, lr_0 = 2.2726e-04
Loss = 4.3963e-02, PNorm = 67.9716, GNorm = 0.5779, lr_0 = 2.2710e-04
Loss = 4.2510e-02, PNorm = 67.9757, GNorm = 0.5434, lr_0 = 2.2695e-04
Loss = 4.2272e-02, PNorm = 67.9809, GNorm = 0.4163, lr_0 = 2.2679e-04
Loss = 4.6119e-02, PNorm = 67.9848, GNorm = 0.4847, lr_0 = 2.2664e-04
Loss = 4.6521e-02, PNorm = 67.9904, GNorm = 0.7719, lr_0 = 2.2648e-04
Loss = 4.6154e-02, PNorm = 67.9970, GNorm = 0.3966, lr_0 = 2.2632e-04
Loss = 4.2660e-02, PNorm = 68.0006, GNorm = 0.4323, lr_0 = 2.2617e-04
Loss = 6.1631e-02, PNorm = 68.0021, GNorm = 0.9663, lr_0 = 2.2601e-04
Loss = 4.5862e-02, PNorm = 68.0055, GNorm = 0.6902, lr_0 = 2.2586e-04
Loss = 5.3476e-02, PNorm = 68.0109, GNorm = 0.5225, lr_0 = 2.2571e-04
Loss = 4.6192e-02, PNorm = 68.0162, GNorm = 0.6401, lr_0 = 2.2555e-04
Loss = 5.6035e-02, PNorm = 68.0204, GNorm = 0.6615, lr_0 = 2.2540e-04
Loss = 4.6576e-02, PNorm = 68.0250, GNorm = 0.7526, lr_0 = 2.2524e-04
Loss = 4.3828e-02, PNorm = 68.0288, GNorm = 0.4404, lr_0 = 2.2509e-04
Loss = 4.8886e-02, PNorm = 68.0319, GNorm = 1.0928, lr_0 = 2.2493e-04
Loss = 4.3724e-02, PNorm = 68.0365, GNorm = 0.4581, lr_0 = 2.2478e-04
Loss = 4.7494e-02, PNorm = 68.0411, GNorm = 0.4352, lr_0 = 2.2463e-04
Loss = 3.5438e-02, PNorm = 68.0442, GNorm = 0.4581, lr_0 = 2.2447e-04
Loss = 4.4753e-02, PNorm = 68.0472, GNorm = 0.6199, lr_0 = 2.2432e-04
Loss = 5.3913e-02, PNorm = 68.0496, GNorm = 0.9294, lr_0 = 2.2416e-04
Loss = 4.0966e-02, PNorm = 68.0514, GNorm = 0.5179, lr_0 = 2.2401e-04
Loss = 4.7037e-02, PNorm = 68.0554, GNorm = 0.5842, lr_0 = 2.2386e-04
Loss = 4.1580e-02, PNorm = 68.0589, GNorm = 0.7059, lr_0 = 2.2370e-04
Loss = 4.4291e-02, PNorm = 68.0621, GNorm = 0.5366, lr_0 = 2.2355e-04
Loss = 4.5734e-02, PNorm = 68.0666, GNorm = 0.4497, lr_0 = 2.2340e-04
Loss = 4.5739e-02, PNorm = 68.0710, GNorm = 0.3600, lr_0 = 2.2324e-04
Loss = 4.8165e-02, PNorm = 68.0747, GNorm = 0.5648, lr_0 = 2.2309e-04
Loss = 4.1864e-02, PNorm = 68.0787, GNorm = 0.5055, lr_0 = 2.2294e-04
Loss = 4.9129e-02, PNorm = 68.0844, GNorm = 0.5505, lr_0 = 2.2279e-04
Loss = 4.8494e-02, PNorm = 68.0886, GNorm = 0.6265, lr_0 = 2.2263e-04
Loss = 5.5024e-02, PNorm = 68.0923, GNorm = 0.5668, lr_0 = 2.2248e-04
Loss = 4.8535e-02, PNorm = 68.0982, GNorm = 0.5168, lr_0 = 2.2233e-04
Loss = 4.7782e-02, PNorm = 68.1038, GNorm = 0.4640, lr_0 = 2.2218e-04
Loss = 4.5445e-02, PNorm = 68.1070, GNorm = 0.4078, lr_0 = 2.2202e-04
Loss = 4.7041e-02, PNorm = 68.1120, GNorm = 0.5997, lr_0 = 2.2187e-04
Loss = 3.8551e-02, PNorm = 68.1145, GNorm = 0.5579, lr_0 = 2.2172e-04
Loss = 4.8431e-02, PNorm = 68.1173, GNorm = 0.5414, lr_0 = 2.2157e-04
Loss = 4.3920e-02, PNorm = 68.1206, GNorm = 0.5328, lr_0 = 2.2142e-04
Loss = 4.2912e-02, PNorm = 68.1260, GNorm = 0.7316, lr_0 = 2.2126e-04
Loss = 5.6296e-02, PNorm = 68.1308, GNorm = 0.9611, lr_0 = 2.2111e-04
Loss = 5.1260e-02, PNorm = 68.1353, GNorm = 0.5603, lr_0 = 2.2096e-04
Loss = 5.3267e-02, PNorm = 68.1394, GNorm = 0.5019, lr_0 = 2.2081e-04
Loss = 4.2552e-02, PNorm = 68.1431, GNorm = 0.5191, lr_0 = 2.2066e-04
Loss = 3.9114e-02, PNorm = 68.1472, GNorm = 0.5942, lr_0 = 2.2051e-04
Loss = 4.9990e-02, PNorm = 68.1523, GNorm = 0.5159, lr_0 = 2.2036e-04
Loss = 4.3340e-02, PNorm = 68.1572, GNorm = 0.4690, lr_0 = 2.2021e-04
Loss = 4.4460e-02, PNorm = 68.1603, GNorm = 0.4404, lr_0 = 2.2005e-04
Loss = 4.7338e-02, PNorm = 68.1643, GNorm = 0.3858, lr_0 = 2.1990e-04
Loss = 4.9262e-02, PNorm = 68.1671, GNorm = 0.4905, lr_0 = 2.1975e-04
Loss = 4.6633e-02, PNorm = 68.1697, GNorm = 0.4276, lr_0 = 2.1960e-04
Loss = 5.1334e-02, PNorm = 68.1756, GNorm = 0.6048, lr_0 = 2.1945e-04
Loss = 4.5913e-02, PNorm = 68.1825, GNorm = 0.5235, lr_0 = 2.1930e-04
Loss = 4.7251e-02, PNorm = 68.1873, GNorm = 0.5078, lr_0 = 2.1915e-04
Loss = 5.8770e-02, PNorm = 68.1886, GNorm = 0.8683, lr_0 = 2.1900e-04
Loss = 5.1251e-02, PNorm = 68.1924, GNorm = 0.7270, lr_0 = 2.1885e-04
Loss = 5.2841e-02, PNorm = 68.1965, GNorm = 0.5601, lr_0 = 2.1870e-04
Loss = 4.9058e-02, PNorm = 68.2000, GNorm = 0.5590, lr_0 = 2.1855e-04
Loss = 4.6718e-02, PNorm = 68.2022, GNorm = 0.7170, lr_0 = 2.1840e-04
Loss = 4.8312e-02, PNorm = 68.2075, GNorm = 0.5611, lr_0 = 2.1825e-04
Loss = 4.8372e-02, PNorm = 68.2116, GNorm = 0.5146, lr_0 = 2.1810e-04
Loss = 4.9070e-02, PNorm = 68.2130, GNorm = 0.4973, lr_0 = 2.1795e-04
Loss = 4.1144e-02, PNorm = 68.2154, GNorm = 0.3954, lr_0 = 2.1780e-04
Loss = 4.6751e-02, PNorm = 68.2171, GNorm = 0.4607, lr_0 = 2.1765e-04
Loss = 5.3274e-02, PNorm = 68.2223, GNorm = 0.4276, lr_0 = 2.1751e-04
Loss = 5.3837e-02, PNorm = 68.2264, GNorm = 0.5262, lr_0 = 2.1736e-04
Loss = 4.6019e-02, PNorm = 68.2290, GNorm = 0.5018, lr_0 = 2.1721e-04
Loss = 4.6293e-02, PNorm = 68.2336, GNorm = 0.4326, lr_0 = 2.1706e-04
Loss = 5.2423e-02, PNorm = 68.2384, GNorm = 0.4511, lr_0 = 2.1691e-04
Loss = 5.5225e-02, PNorm = 68.2423, GNorm = 0.5626, lr_0 = 2.1676e-04
Loss = 4.5852e-02, PNorm = 68.2461, GNorm = 0.7524, lr_0 = 2.1661e-04
Loss = 5.1525e-02, PNorm = 68.2489, GNorm = 0.6065, lr_0 = 2.1646e-04
Loss = 4.8048e-02, PNorm = 68.2511, GNorm = 0.6161, lr_0 = 2.1632e-04
Loss = 4.3898e-02, PNorm = 68.2548, GNorm = 0.4927, lr_0 = 2.1617e-04
Loss = 4.5053e-02, PNorm = 68.2588, GNorm = 0.4990, lr_0 = 2.1602e-04
Loss = 5.2850e-02, PNorm = 68.2628, GNorm = 0.5652, lr_0 = 2.1587e-04
Loss = 4.4968e-02, PNorm = 68.2665, GNorm = 0.3630, lr_0 = 2.1572e-04
Loss = 4.4209e-02, PNorm = 68.2711, GNorm = 0.6166, lr_0 = 2.1558e-04
Loss = 4.7877e-02, PNorm = 68.2742, GNorm = 0.3762, lr_0 = 2.1543e-04
Loss = 5.4143e-02, PNorm = 68.2763, GNorm = 0.8356, lr_0 = 2.1528e-04
Loss = 4.7294e-02, PNorm = 68.2792, GNorm = 0.5029, lr_0 = 2.1513e-04
Loss = 4.6074e-02, PNorm = 68.2822, GNorm = 0.4089, lr_0 = 2.1499e-04
Loss = 4.7004e-02, PNorm = 68.2869, GNorm = 0.6174, lr_0 = 2.1484e-04
Loss = 5.9862e-02, PNorm = 68.2896, GNorm = 0.5944, lr_0 = 2.1469e-04
Loss = 4.3774e-02, PNorm = 68.2938, GNorm = 0.3871, lr_0 = 2.1454e-04
Loss = 4.5111e-02, PNorm = 68.2968, GNorm = 0.6609, lr_0 = 2.1440e-04
Loss = 4.5682e-02, PNorm = 68.2996, GNorm = 0.4797, lr_0 = 2.1425e-04
Loss = 4.8982e-02, PNorm = 68.3034, GNorm = 0.4029, lr_0 = 2.1410e-04
Loss = 4.2924e-02, PNorm = 68.3062, GNorm = 0.6816, lr_0 = 2.1396e-04
Loss = 4.7447e-02, PNorm = 68.3090, GNorm = 0.4080, lr_0 = 2.1381e-04
Loss = 5.7316e-02, PNorm = 68.3132, GNorm = 0.5516, lr_0 = 2.1366e-04
Loss = 3.9502e-02, PNorm = 68.3170, GNorm = 0.6739, lr_0 = 2.1352e-04
Loss = 5.2321e-02, PNorm = 68.3201, GNorm = 0.6716, lr_0 = 2.1337e-04
Loss = 4.4007e-02, PNorm = 68.3247, GNorm = 0.5435, lr_0 = 2.1323e-04
Loss = 5.0980e-02, PNorm = 68.3266, GNorm = 0.6095, lr_0 = 2.1308e-04
Loss = 5.0169e-02, PNorm = 68.3297, GNorm = 0.5925, lr_0 = 2.1293e-04
Loss = 5.1615e-02, PNorm = 68.3342, GNorm = 0.8097, lr_0 = 2.1279e-04
Loss = 5.4030e-02, PNorm = 68.3405, GNorm = 0.9778, lr_0 = 2.1264e-04
Loss = 5.0614e-02, PNorm = 68.3412, GNorm = 0.6118, lr_0 = 2.1250e-04
Loss = 4.9850e-02, PNorm = 68.3442, GNorm = 0.5365, lr_0 = 2.1235e-04
Loss = 4.8800e-02, PNorm = 68.3469, GNorm = 0.6475, lr_0 = 2.1221e-04
Loss = 4.5883e-02, PNorm = 68.3493, GNorm = 0.4784, lr_0 = 2.1206e-04
Loss = 5.3367e-02, PNorm = 68.3510, GNorm = 0.4360, lr_0 = 2.1191e-04
Loss = 6.3066e-02, PNorm = 68.3544, GNorm = 0.4466, lr_0 = 2.1177e-04
Loss = 5.1471e-02, PNorm = 68.3581, GNorm = 0.5257, lr_0 = 2.1162e-04
Loss = 4.6361e-02, PNorm = 68.3618, GNorm = 0.8495, lr_0 = 2.1148e-04
Loss = 5.4305e-02, PNorm = 68.3647, GNorm = 0.9623, lr_0 = 2.1133e-04
Loss = 4.7021e-02, PNorm = 68.3671, GNorm = 0.5505, lr_0 = 2.1119e-04
Loss = 4.7862e-02, PNorm = 68.3689, GNorm = 0.5536, lr_0 = 2.1104e-04
Loss = 5.9089e-02, PNorm = 68.3731, GNorm = 0.5865, lr_0 = 2.1090e-04
Loss = 6.3586e-02, PNorm = 68.3789, GNorm = 0.8601, lr_0 = 2.1076e-04
Loss = 5.0634e-02, PNorm = 68.3844, GNorm = 0.4183, lr_0 = 2.1061e-04
Loss = 4.9982e-02, PNorm = 68.3879, GNorm = 0.5612, lr_0 = 2.1047e-04
Loss = 4.9501e-02, PNorm = 68.3926, GNorm = 1.1397, lr_0 = 2.1032e-04
Loss = 4.9105e-02, PNorm = 68.3957, GNorm = 0.5157, lr_0 = 2.1018e-04
Loss = 5.4603e-02, PNorm = 68.3994, GNorm = 0.5961, lr_0 = 2.1003e-04
Loss = 5.3521e-02, PNorm = 68.4054, GNorm = 0.7474, lr_0 = 2.0989e-04
Loss = 4.6923e-02, PNorm = 68.4078, GNorm = 0.4931, lr_0 = 2.0975e-04
Loss = 4.9840e-02, PNorm = 68.4100, GNorm = 0.8158, lr_0 = 2.0960e-04
Validation mae = 0.386771
Epoch 21
Loss = 3.8291e-02, PNorm = 68.4133, GNorm = 0.3153, lr_0 = 2.0946e-04
Loss = 4.3100e-02, PNorm = 68.4173, GNorm = 0.5234, lr_0 = 2.0932e-04
Loss = 4.4459e-02, PNorm = 68.4205, GNorm = 0.5127, lr_0 = 2.0917e-04
Loss = 4.1227e-02, PNorm = 68.4245, GNorm = 0.3818, lr_0 = 2.0903e-04
Loss = 4.2403e-02, PNorm = 68.4288, GNorm = 0.5366, lr_0 = 2.0889e-04
Loss = 4.0519e-02, PNorm = 68.4317, GNorm = 0.4885, lr_0 = 2.0874e-04
Loss = 4.5730e-02, PNorm = 68.4362, GNorm = 0.5033, lr_0 = 2.0860e-04
Loss = 3.6339e-02, PNorm = 68.4394, GNorm = 0.3276, lr_0 = 2.0846e-04
Loss = 4.5399e-02, PNorm = 68.4424, GNorm = 0.3651, lr_0 = 2.0831e-04
Loss = 3.9139e-02, PNorm = 68.4458, GNorm = 0.5186, lr_0 = 2.0817e-04
Loss = 4.4905e-02, PNorm = 68.4509, GNorm = 0.4237, lr_0 = 2.0803e-04
Loss = 4.5485e-02, PNorm = 68.4557, GNorm = 0.4949, lr_0 = 2.0789e-04
Loss = 4.1817e-02, PNorm = 68.4584, GNorm = 0.5569, lr_0 = 2.0774e-04
Loss = 4.0614e-02, PNorm = 68.4623, GNorm = 0.4364, lr_0 = 2.0760e-04
Loss = 3.6261e-02, PNorm = 68.4651, GNorm = 0.6244, lr_0 = 2.0746e-04
Loss = 5.1513e-02, PNorm = 68.4676, GNorm = 0.4089, lr_0 = 2.0732e-04
Loss = 4.4572e-02, PNorm = 68.4703, GNorm = 0.6811, lr_0 = 2.0718e-04
Loss = 4.6018e-02, PNorm = 68.4734, GNorm = 0.5325, lr_0 = 2.0703e-04
Loss = 4.2668e-02, PNorm = 68.4780, GNorm = 0.5862, lr_0 = 2.0689e-04
Loss = 4.1686e-02, PNorm = 68.4821, GNorm = 0.4472, lr_0 = 2.0675e-04
Loss = 4.7050e-02, PNorm = 68.4857, GNorm = 0.5419, lr_0 = 2.0661e-04
Loss = 4.2223e-02, PNorm = 68.4894, GNorm = 0.4633, lr_0 = 2.0647e-04
Loss = 4.4408e-02, PNorm = 68.4937, GNorm = 0.4582, lr_0 = 2.0633e-04
Loss = 4.0489e-02, PNorm = 68.4967, GNorm = 0.4180, lr_0 = 2.0618e-04
Loss = 4.3939e-02, PNorm = 68.4992, GNorm = 0.6565, lr_0 = 2.0604e-04
Loss = 4.4639e-02, PNorm = 68.5005, GNorm = 0.4221, lr_0 = 2.0590e-04
Loss = 3.6361e-02, PNorm = 68.5036, GNorm = 0.4723, lr_0 = 2.0576e-04
Loss = 4.4606e-02, PNorm = 68.5061, GNorm = 0.3689, lr_0 = 2.0562e-04
Loss = 4.5668e-02, PNorm = 68.5096, GNorm = 0.6935, lr_0 = 2.0548e-04
Loss = 4.3631e-02, PNorm = 68.5130, GNorm = 0.4908, lr_0 = 2.0534e-04
Loss = 4.9677e-02, PNorm = 68.5168, GNorm = 0.4788, lr_0 = 2.0520e-04
Loss = 4.3386e-02, PNorm = 68.5211, GNorm = 0.4369, lr_0 = 2.0506e-04
Loss = 3.8890e-02, PNorm = 68.5241, GNorm = 0.3591, lr_0 = 2.0492e-04
Loss = 4.4009e-02, PNorm = 68.5269, GNorm = 0.7004, lr_0 = 2.0478e-04
Loss = 5.4417e-02, PNorm = 68.5301, GNorm = 0.6241, lr_0 = 2.0464e-04
Loss = 4.7005e-02, PNorm = 68.5345, GNorm = 0.5704, lr_0 = 2.0450e-04
Loss = 5.3399e-02, PNorm = 68.5404, GNorm = 0.6090, lr_0 = 2.0436e-04
Loss = 5.2471e-02, PNorm = 68.5440, GNorm = 0.6174, lr_0 = 2.0422e-04
Loss = 4.0279e-02, PNorm = 68.5462, GNorm = 0.5165, lr_0 = 2.0408e-04
Loss = 4.8351e-02, PNorm = 68.5477, GNorm = 0.6479, lr_0 = 2.0394e-04
Loss = 4.7010e-02, PNorm = 68.5506, GNorm = 0.4886, lr_0 = 2.0380e-04
Loss = 4.3896e-02, PNorm = 68.5568, GNorm = 0.6747, lr_0 = 2.0366e-04
Loss = 4.2486e-02, PNorm = 68.5589, GNorm = 0.6054, lr_0 = 2.0352e-04
Loss = 4.8838e-02, PNorm = 68.5608, GNorm = 0.6344, lr_0 = 2.0338e-04
Loss = 4.2879e-02, PNorm = 68.5642, GNorm = 0.5707, lr_0 = 2.0324e-04
Loss = 4.4058e-02, PNorm = 68.5679, GNorm = 0.5308, lr_0 = 2.0310e-04
Loss = 3.7323e-02, PNorm = 68.5704, GNorm = 0.5081, lr_0 = 2.0296e-04
Loss = 3.9977e-02, PNorm = 68.5735, GNorm = 0.3569, lr_0 = 2.0282e-04
Loss = 4.6697e-02, PNorm = 68.5779, GNorm = 0.4890, lr_0 = 2.0268e-04
Loss = 4.2418e-02, PNorm = 68.5794, GNorm = 0.5785, lr_0 = 2.0254e-04
Loss = 4.9761e-02, PNorm = 68.5828, GNorm = 0.4409, lr_0 = 2.0240e-04
Loss = 5.3018e-02, PNorm = 68.5871, GNorm = 0.4967, lr_0 = 2.0227e-04
Loss = 4.6150e-02, PNorm = 68.5916, GNorm = 0.4163, lr_0 = 2.0213e-04
Loss = 4.0965e-02, PNorm = 68.5955, GNorm = 0.4417, lr_0 = 2.0199e-04
Loss = 4.7134e-02, PNorm = 68.5995, GNorm = 0.5634, lr_0 = 2.0185e-04
Loss = 4.6027e-02, PNorm = 68.6041, GNorm = 0.4969, lr_0 = 2.0171e-04
Loss = 6.0696e-02, PNorm = 68.6070, GNorm = 0.6417, lr_0 = 2.0157e-04
Loss = 4.8589e-02, PNorm = 68.6100, GNorm = 0.4004, lr_0 = 2.0144e-04
Loss = 4.9374e-02, PNorm = 68.6143, GNorm = 0.7363, lr_0 = 2.0130e-04
Loss = 4.3684e-02, PNorm = 68.6175, GNorm = 0.4850, lr_0 = 2.0116e-04
Loss = 4.7078e-02, PNorm = 68.6207, GNorm = 0.5684, lr_0 = 2.0102e-04
Loss = 3.9308e-02, PNorm = 68.6235, GNorm = 0.4971, lr_0 = 2.0088e-04
Loss = 4.2636e-02, PNorm = 68.6244, GNorm = 0.9220, lr_0 = 2.0075e-04
Loss = 4.2206e-02, PNorm = 68.6266, GNorm = 0.7048, lr_0 = 2.0061e-04
Loss = 4.8952e-02, PNorm = 68.6299, GNorm = 0.7763, lr_0 = 2.0047e-04
Loss = 4.5343e-02, PNorm = 68.6330, GNorm = 0.5431, lr_0 = 2.0033e-04
Loss = 4.9859e-02, PNorm = 68.6363, GNorm = 0.4698, lr_0 = 2.0020e-04
Loss = 5.0925e-02, PNorm = 68.6409, GNorm = 0.4580, lr_0 = 2.0006e-04
Loss = 4.9405e-02, PNorm = 68.6439, GNorm = 0.4912, lr_0 = 1.9992e-04
Loss = 4.5992e-02, PNorm = 68.6471, GNorm = 0.7154, lr_0 = 1.9979e-04
Loss = 4.4126e-02, PNorm = 68.6518, GNorm = 0.5038, lr_0 = 1.9965e-04
Loss = 5.3107e-02, PNorm = 68.6545, GNorm = 0.5261, lr_0 = 1.9951e-04
Loss = 4.2803e-02, PNorm = 68.6588, GNorm = 0.5602, lr_0 = 1.9938e-04
Loss = 4.9829e-02, PNorm = 68.6638, GNorm = 0.6361, lr_0 = 1.9924e-04
Loss = 4.2427e-02, PNorm = 68.6661, GNorm = 0.5163, lr_0 = 1.9910e-04
Loss = 4.6973e-02, PNorm = 68.6693, GNorm = 0.8741, lr_0 = 1.9897e-04
Loss = 5.1867e-02, PNorm = 68.6730, GNorm = 0.9539, lr_0 = 1.9883e-04
Loss = 3.9935e-02, PNorm = 68.6753, GNorm = 0.5062, lr_0 = 1.9869e-04
Loss = 4.8574e-02, PNorm = 68.6795, GNorm = 1.0551, lr_0 = 1.9856e-04
Loss = 4.2928e-02, PNorm = 68.6836, GNorm = 0.4880, lr_0 = 1.9842e-04
Loss = 4.7105e-02, PNorm = 68.6865, GNorm = 0.6586, lr_0 = 1.9829e-04
Loss = 4.8263e-02, PNorm = 68.6895, GNorm = 0.5437, lr_0 = 1.9815e-04
Loss = 5.4952e-02, PNorm = 68.6950, GNorm = 0.5553, lr_0 = 1.9801e-04
Loss = 4.4908e-02, PNorm = 68.6988, GNorm = 0.4607, lr_0 = 1.9788e-04
Loss = 4.6754e-02, PNorm = 68.7024, GNorm = 0.4969, lr_0 = 1.9774e-04
Loss = 4.6615e-02, PNorm = 68.7057, GNorm = 0.5316, lr_0 = 1.9761e-04
Loss = 4.1299e-02, PNorm = 68.7085, GNorm = 0.5571, lr_0 = 1.9747e-04
Loss = 4.3531e-02, PNorm = 68.7101, GNorm = 0.4786, lr_0 = 1.9734e-04
Loss = 3.8057e-02, PNorm = 68.7124, GNorm = 0.4418, lr_0 = 1.9720e-04
Loss = 4.7783e-02, PNorm = 68.7156, GNorm = 0.8935, lr_0 = 1.9707e-04
Loss = 5.0878e-02, PNorm = 68.7195, GNorm = 0.4568, lr_0 = 1.9693e-04
Loss = 4.3165e-02, PNorm = 68.7230, GNorm = 0.4949, lr_0 = 1.9680e-04
Loss = 4.7121e-02, PNorm = 68.7255, GNorm = 0.5826, lr_0 = 1.9666e-04
Loss = 5.1352e-02, PNorm = 68.7282, GNorm = 0.4760, lr_0 = 1.9653e-04
Loss = 4.6622e-02, PNorm = 68.7308, GNorm = 0.5775, lr_0 = 1.9639e-04
Loss = 5.1939e-02, PNorm = 68.7347, GNorm = 0.6680, lr_0 = 1.9626e-04
Loss = 4.9579e-02, PNorm = 68.7396, GNorm = 0.4178, lr_0 = 1.9612e-04
Loss = 4.7856e-02, PNorm = 68.7438, GNorm = 0.4514, lr_0 = 1.9599e-04
Loss = 4.9616e-02, PNorm = 68.7466, GNorm = 0.9971, lr_0 = 1.9585e-04
Loss = 4.1040e-02, PNorm = 68.7483, GNorm = 0.4370, lr_0 = 1.9572e-04
Loss = 4.6249e-02, PNorm = 68.7514, GNorm = 0.4119, lr_0 = 1.9559e-04
Loss = 4.6659e-02, PNorm = 68.7528, GNorm = 0.3797, lr_0 = 1.9545e-04
Loss = 4.7601e-02, PNorm = 68.7539, GNorm = 0.7611, lr_0 = 1.9532e-04
Loss = 4.3146e-02, PNorm = 68.7548, GNorm = 0.4487, lr_0 = 1.9518e-04
Loss = 3.9876e-02, PNorm = 68.7578, GNorm = 0.4536, lr_0 = 1.9505e-04
Loss = 4.4970e-02, PNorm = 68.7601, GNorm = 0.5674, lr_0 = 1.9492e-04
Loss = 4.5081e-02, PNorm = 68.7618, GNorm = 0.4509, lr_0 = 1.9478e-04
Loss = 4.6138e-02, PNorm = 68.7650, GNorm = 0.8421, lr_0 = 1.9465e-04
Loss = 4.4779e-02, PNorm = 68.7677, GNorm = 0.8259, lr_0 = 1.9452e-04
Loss = 4.7012e-02, PNorm = 68.7713, GNorm = 0.5277, lr_0 = 1.9438e-04
Loss = 5.3907e-02, PNorm = 68.7750, GNorm = 0.6880, lr_0 = 1.9425e-04
Loss = 5.8334e-02, PNorm = 68.7790, GNorm = 0.4958, lr_0 = 1.9412e-04
Loss = 4.6607e-02, PNorm = 68.7813, GNorm = 0.4824, lr_0 = 1.9398e-04
Loss = 5.3399e-02, PNorm = 68.7862, GNorm = 0.6271, lr_0 = 1.9385e-04
Loss = 4.4577e-02, PNorm = 68.7899, GNorm = 0.5791, lr_0 = 1.9372e-04
Loss = 4.1173e-02, PNorm = 68.7917, GNorm = 0.4596, lr_0 = 1.9359e-04
Loss = 4.8161e-02, PNorm = 68.7946, GNorm = 0.5842, lr_0 = 1.9345e-04
Loss = 5.3039e-02, PNorm = 68.7978, GNorm = 0.4683, lr_0 = 1.9332e-04
Loss = 4.9279e-02, PNorm = 68.8005, GNorm = 0.6607, lr_0 = 1.9319e-04
Loss = 4.8559e-02, PNorm = 68.8032, GNorm = 0.5451, lr_0 = 1.9306e-04
Validation mae = 0.390543
Epoch 22
Loss = 3.9879e-02, PNorm = 68.8050, GNorm = 0.6820, lr_0 = 1.9292e-04
Loss = 4.9110e-02, PNorm = 68.8097, GNorm = 0.9380, lr_0 = 1.9279e-04
Loss = 4.4040e-02, PNorm = 68.8138, GNorm = 0.5567, lr_0 = 1.9266e-04
Loss = 4.0242e-02, PNorm = 68.8171, GNorm = 0.4097, lr_0 = 1.9253e-04
Loss = 4.2523e-02, PNorm = 68.8197, GNorm = 0.5335, lr_0 = 1.9240e-04
Loss = 3.8026e-02, PNorm = 68.8209, GNorm = 0.3605, lr_0 = 1.9226e-04
Loss = 5.0543e-02, PNorm = 68.8239, GNorm = 0.4263, lr_0 = 1.9213e-04
Loss = 3.6176e-02, PNorm = 68.8276, GNorm = 0.4982, lr_0 = 1.9200e-04
Loss = 3.8683e-02, PNorm = 68.8306, GNorm = 0.3641, lr_0 = 1.9187e-04
Loss = 3.7626e-02, PNorm = 68.8339, GNorm = 0.4596, lr_0 = 1.9174e-04
Loss = 4.2784e-02, PNorm = 68.8371, GNorm = 0.4436, lr_0 = 1.9161e-04
Loss = 4.0796e-02, PNorm = 68.8396, GNorm = 0.4388, lr_0 = 1.9148e-04
Loss = 3.9403e-02, PNorm = 68.8426, GNorm = 0.3947, lr_0 = 1.9134e-04
Loss = 4.4069e-02, PNorm = 68.8461, GNorm = 0.7125, lr_0 = 1.9121e-04
Loss = 4.1576e-02, PNorm = 68.8505, GNorm = 0.8297, lr_0 = 1.9108e-04
Loss = 4.0223e-02, PNorm = 68.8538, GNorm = 0.8489, lr_0 = 1.9095e-04
Loss = 3.7581e-02, PNorm = 68.8564, GNorm = 0.5219, lr_0 = 1.9082e-04
Loss = 3.9827e-02, PNorm = 68.8607, GNorm = 0.3740, lr_0 = 1.9069e-04
Loss = 4.8835e-02, PNorm = 68.8652, GNorm = 0.4869, lr_0 = 1.9056e-04
Loss = 4.2828e-02, PNorm = 68.8708, GNorm = 0.4683, lr_0 = 1.9043e-04
Loss = 3.4520e-02, PNorm = 68.8763, GNorm = 0.5210, lr_0 = 1.9030e-04
Loss = 4.5167e-02, PNorm = 68.8801, GNorm = 0.4145, lr_0 = 1.9017e-04
Loss = 3.9150e-02, PNorm = 68.8825, GNorm = 0.5526, lr_0 = 1.9004e-04
Loss = 4.2497e-02, PNorm = 68.8846, GNorm = 0.7019, lr_0 = 1.8991e-04
Loss = 4.6567e-02, PNorm = 68.8881, GNorm = 0.4783, lr_0 = 1.8978e-04
Loss = 3.6018e-02, PNorm = 68.8913, GNorm = 0.4638, lr_0 = 1.8965e-04
Loss = 4.5766e-02, PNorm = 68.8954, GNorm = 0.5844, lr_0 = 1.8952e-04
Loss = 4.3446e-02, PNorm = 68.8989, GNorm = 0.5282, lr_0 = 1.8939e-04
Loss = 3.8395e-02, PNorm = 68.8995, GNorm = 0.5176, lr_0 = 1.8926e-04
Loss = 3.9466e-02, PNorm = 68.8999, GNorm = 0.4668, lr_0 = 1.8913e-04
Loss = 4.3224e-02, PNorm = 68.9014, GNorm = 0.5060, lr_0 = 1.8900e-04
Loss = 4.9734e-02, PNorm = 68.9057, GNorm = 0.6175, lr_0 = 1.8887e-04
Loss = 4.7107e-02, PNorm = 68.9088, GNorm = 0.4299, lr_0 = 1.8874e-04
Loss = 4.9415e-02, PNorm = 68.9104, GNorm = 0.6008, lr_0 = 1.8861e-04
Loss = 3.9625e-02, PNorm = 68.9142, GNorm = 0.4380, lr_0 = 1.8848e-04
Loss = 4.1410e-02, PNorm = 68.9166, GNorm = 0.5711, lr_0 = 1.8835e-04
Loss = 4.3301e-02, PNorm = 68.9199, GNorm = 0.5681, lr_0 = 1.8822e-04
Loss = 4.0322e-02, PNorm = 68.9223, GNorm = 0.4925, lr_0 = 1.8809e-04
Loss = 3.5377e-02, PNorm = 68.9255, GNorm = 0.2872, lr_0 = 1.8797e-04
Loss = 4.2199e-02, PNorm = 68.9290, GNorm = 0.3831, lr_0 = 1.8784e-04
Loss = 4.5039e-02, PNorm = 68.9321, GNorm = 0.4760, lr_0 = 1.8771e-04
Loss = 3.9789e-02, PNorm = 68.9351, GNorm = 0.4412, lr_0 = 1.8758e-04
Loss = 4.1026e-02, PNorm = 68.9377, GNorm = 0.3659, lr_0 = 1.8745e-04
Loss = 4.5191e-02, PNorm = 68.9416, GNorm = 0.4581, lr_0 = 1.8732e-04
Loss = 4.4265e-02, PNorm = 68.9451, GNorm = 0.3979, lr_0 = 1.8719e-04
Loss = 4.2153e-02, PNorm = 68.9483, GNorm = 0.5165, lr_0 = 1.8707e-04
Loss = 5.0083e-02, PNorm = 68.9527, GNorm = 0.6100, lr_0 = 1.8694e-04
Loss = 4.4249e-02, PNorm = 68.9565, GNorm = 0.5663, lr_0 = 1.8681e-04
Loss = 4.3623e-02, PNorm = 68.9590, GNorm = 0.5123, lr_0 = 1.8668e-04
Loss = 3.7641e-02, PNorm = 68.9612, GNorm = 0.7459, lr_0 = 1.8655e-04
Loss = 3.6952e-02, PNorm = 68.9631, GNorm = 0.4518, lr_0 = 1.8643e-04
Loss = 4.2768e-02, PNorm = 68.9654, GNorm = 0.4241, lr_0 = 1.8630e-04
Loss = 4.1233e-02, PNorm = 68.9684, GNorm = 0.3807, lr_0 = 1.8617e-04
Loss = 4.0996e-02, PNorm = 68.9701, GNorm = 0.3461, lr_0 = 1.8604e-04
Loss = 3.9793e-02, PNorm = 68.9739, GNorm = 0.5845, lr_0 = 1.8592e-04
Loss = 4.0622e-02, PNorm = 68.9764, GNorm = 0.4066, lr_0 = 1.8579e-04
Loss = 4.3461e-02, PNorm = 68.9787, GNorm = 0.5598, lr_0 = 1.8566e-04
Loss = 4.5480e-02, PNorm = 68.9797, GNorm = 0.4919, lr_0 = 1.8553e-04
Loss = 4.3035e-02, PNorm = 68.9819, GNorm = 0.4194, lr_0 = 1.8541e-04
Loss = 3.8876e-02, PNorm = 68.9845, GNorm = 0.4700, lr_0 = 1.8528e-04
Loss = 4.1446e-02, PNorm = 68.9878, GNorm = 0.5394, lr_0 = 1.8515e-04
Loss = 3.8456e-02, PNorm = 68.9906, GNorm = 0.4083, lr_0 = 1.8503e-04
Loss = 4.6380e-02, PNorm = 68.9918, GNorm = 0.6803, lr_0 = 1.8490e-04
Loss = 4.6036e-02, PNorm = 68.9940, GNorm = 0.5317, lr_0 = 1.8477e-04
Loss = 4.0110e-02, PNorm = 68.9966, GNorm = 0.4972, lr_0 = 1.8465e-04
Loss = 4.9342e-02, PNorm = 68.9984, GNorm = 0.4263, lr_0 = 1.8452e-04
Loss = 3.6446e-02, PNorm = 69.0006, GNorm = 0.7515, lr_0 = 1.8439e-04
Loss = 4.2069e-02, PNorm = 69.0042, GNorm = 0.5912, lr_0 = 1.8427e-04
Loss = 4.1917e-02, PNorm = 69.0073, GNorm = 0.6390, lr_0 = 1.8414e-04
Loss = 4.9804e-02, PNorm = 69.0113, GNorm = 0.7690, lr_0 = 1.8401e-04
Loss = 4.1485e-02, PNorm = 69.0144, GNorm = 0.7031, lr_0 = 1.8389e-04
Loss = 4.4358e-02, PNorm = 69.0165, GNorm = 0.8209, lr_0 = 1.8376e-04
Loss = 4.8561e-02, PNorm = 69.0189, GNorm = 0.4093, lr_0 = 1.8364e-04
Loss = 5.7157e-02, PNorm = 69.0230, GNorm = 0.7543, lr_0 = 1.8351e-04
Loss = 4.5258e-02, PNorm = 69.0276, GNorm = 0.7375, lr_0 = 1.8338e-04
Loss = 4.6479e-02, PNorm = 69.0332, GNorm = 0.3674, lr_0 = 1.8326e-04
Loss = 4.6981e-02, PNorm = 69.0331, GNorm = 0.4123, lr_0 = 1.8313e-04
Loss = 4.6396e-02, PNorm = 69.0345, GNorm = 0.4730, lr_0 = 1.8301e-04
Loss = 4.5101e-02, PNorm = 69.0376, GNorm = 0.5493, lr_0 = 1.8288e-04
Loss = 4.5916e-02, PNorm = 69.0407, GNorm = 0.9801, lr_0 = 1.8276e-04
Loss = 4.2286e-02, PNorm = 69.0418, GNorm = 0.4702, lr_0 = 1.8263e-04
Loss = 4.0331e-02, PNorm = 69.0436, GNorm = 0.6251, lr_0 = 1.8251e-04
Loss = 3.8477e-02, PNorm = 69.0467, GNorm = 0.8081, lr_0 = 1.8238e-04
Loss = 4.1374e-02, PNorm = 69.0500, GNorm = 0.3820, lr_0 = 1.8226e-04
Loss = 4.4995e-02, PNorm = 69.0525, GNorm = 0.4376, lr_0 = 1.8213e-04
Loss = 4.3749e-02, PNorm = 69.0562, GNorm = 0.3985, lr_0 = 1.8201e-04
Loss = 4.4898e-02, PNorm = 69.0588, GNorm = 0.5652, lr_0 = 1.8188e-04
Loss = 4.2476e-02, PNorm = 69.0614, GNorm = 0.4323, lr_0 = 1.8176e-04
Loss = 4.0369e-02, PNorm = 69.0630, GNorm = 0.7274, lr_0 = 1.8163e-04
Loss = 4.0720e-02, PNorm = 69.0646, GNorm = 0.4398, lr_0 = 1.8151e-04
Loss = 3.9977e-02, PNorm = 69.0664, GNorm = 0.6665, lr_0 = 1.8138e-04
Loss = 4.8320e-02, PNorm = 69.0681, GNorm = 0.6228, lr_0 = 1.8126e-04
Loss = 3.6589e-02, PNorm = 69.0697, GNorm = 0.4556, lr_0 = 1.8114e-04
Loss = 3.7281e-02, PNorm = 69.0720, GNorm = 0.4330, lr_0 = 1.8101e-04
Loss = 4.3817e-02, PNorm = 69.0758, GNorm = 0.6460, lr_0 = 1.8089e-04
Loss = 4.7011e-02, PNorm = 69.0793, GNorm = 0.5247, lr_0 = 1.8076e-04
Loss = 4.5367e-02, PNorm = 69.0826, GNorm = 0.6712, lr_0 = 1.8064e-04
Loss = 5.0417e-02, PNorm = 69.0858, GNorm = 0.5679, lr_0 = 1.8052e-04
Loss = 4.2051e-02, PNorm = 69.0872, GNorm = 0.5614, lr_0 = 1.8039e-04
Loss = 4.1877e-02, PNorm = 69.0896, GNorm = 0.5334, lr_0 = 1.8027e-04
Loss = 4.3281e-02, PNorm = 69.0916, GNorm = 0.7044, lr_0 = 1.8015e-04
Loss = 3.8254e-02, PNorm = 69.0944, GNorm = 0.3907, lr_0 = 1.8002e-04
Loss = 4.8634e-02, PNorm = 69.0960, GNorm = 0.5827, lr_0 = 1.7990e-04
Loss = 4.0593e-02, PNorm = 69.0978, GNorm = 0.5881, lr_0 = 1.7978e-04
Loss = 4.2859e-02, PNorm = 69.0996, GNorm = 0.3949, lr_0 = 1.7965e-04
Loss = 4.2094e-02, PNorm = 69.1012, GNorm = 0.7087, lr_0 = 1.7953e-04
Loss = 5.0505e-02, PNorm = 69.1051, GNorm = 0.7077, lr_0 = 1.7941e-04
Loss = 4.3896e-02, PNorm = 69.1082, GNorm = 0.6661, lr_0 = 1.7928e-04
Loss = 3.5945e-02, PNorm = 69.1102, GNorm = 0.5735, lr_0 = 1.7916e-04
Loss = 4.7004e-02, PNorm = 69.1133, GNorm = 0.6078, lr_0 = 1.7904e-04
Loss = 4.8772e-02, PNorm = 69.1168, GNorm = 0.5269, lr_0 = 1.7892e-04
Loss = 3.5752e-02, PNorm = 69.1196, GNorm = 0.6039, lr_0 = 1.7879e-04
Loss = 4.6828e-02, PNorm = 69.1228, GNorm = 0.4614, lr_0 = 1.7867e-04
Loss = 4.5043e-02, PNorm = 69.1274, GNorm = 0.5581, lr_0 = 1.7855e-04
Loss = 4.4649e-02, PNorm = 69.1301, GNorm = 0.4160, lr_0 = 1.7843e-04
Loss = 4.4706e-02, PNorm = 69.1313, GNorm = 0.4541, lr_0 = 1.7830e-04
Loss = 5.0834e-02, PNorm = 69.1337, GNorm = 0.8021, lr_0 = 1.7818e-04
Loss = 4.4330e-02, PNorm = 69.1357, GNorm = 0.5438, lr_0 = 1.7806e-04
Loss = 4.7576e-02, PNorm = 69.1391, GNorm = 0.8062, lr_0 = 1.7794e-04
Loss = 4.9298e-02, PNorm = 69.1422, GNorm = 0.4189, lr_0 = 1.7782e-04
Validation mae = 0.394658
Epoch 23
Loss = 3.5498e-02, PNorm = 69.1453, GNorm = 0.4581, lr_0 = 1.7769e-04
Loss = 4.3886e-02, PNorm = 69.1488, GNorm = 0.7659, lr_0 = 1.7757e-04
Loss = 3.8134e-02, PNorm = 69.1521, GNorm = 0.6371, lr_0 = 1.7745e-04
Loss = 3.7755e-02, PNorm = 69.1566, GNorm = 0.4817, lr_0 = 1.7733e-04
Loss = 4.0800e-02, PNorm = 69.1598, GNorm = 0.5286, lr_0 = 1.7721e-04
Loss = 4.3372e-02, PNorm = 69.1625, GNorm = 0.5215, lr_0 = 1.7709e-04
Loss = 3.9380e-02, PNorm = 69.1638, GNorm = 0.5189, lr_0 = 1.7696e-04
Loss = 4.0047e-02, PNorm = 69.1654, GNorm = 0.6496, lr_0 = 1.7684e-04
Loss = 3.7965e-02, PNorm = 69.1683, GNorm = 0.5062, lr_0 = 1.7672e-04
Loss = 3.1902e-02, PNorm = 69.1699, GNorm = 0.4646, lr_0 = 1.7660e-04
Loss = 3.7223e-02, PNorm = 69.1723, GNorm = 0.5201, lr_0 = 1.7648e-04
Loss = 3.5488e-02, PNorm = 69.1726, GNorm = 0.4540, lr_0 = 1.7636e-04
Loss = 3.3840e-02, PNorm = 69.1745, GNorm = 0.5709, lr_0 = 1.7624e-04
Loss = 3.9200e-02, PNorm = 69.1780, GNorm = 0.4324, lr_0 = 1.7612e-04
Loss = 4.0547e-02, PNorm = 69.1816, GNorm = 0.6691, lr_0 = 1.7600e-04
Loss = 4.0475e-02, PNorm = 69.1852, GNorm = 0.5048, lr_0 = 1.7588e-04
Loss = 4.0476e-02, PNorm = 69.1894, GNorm = 0.3655, lr_0 = 1.7576e-04
Loss = 3.8917e-02, PNorm = 69.1923, GNorm = 0.6065, lr_0 = 1.7564e-04
Loss = 3.7408e-02, PNorm = 69.1949, GNorm = 0.5097, lr_0 = 1.7552e-04
Loss = 4.4507e-02, PNorm = 69.1987, GNorm = 0.5728, lr_0 = 1.7540e-04
Loss = 3.5061e-02, PNorm = 69.2011, GNorm = 0.5803, lr_0 = 1.7528e-04
Loss = 4.3265e-02, PNorm = 69.2031, GNorm = 0.7640, lr_0 = 1.7516e-04
Loss = 4.1131e-02, PNorm = 69.2062, GNorm = 0.4442, lr_0 = 1.7504e-04
Loss = 3.5513e-02, PNorm = 69.2094, GNorm = 0.4072, lr_0 = 1.7492e-04
Loss = 4.3039e-02, PNorm = 69.2132, GNorm = 0.5085, lr_0 = 1.7480e-04
Loss = 4.0923e-02, PNorm = 69.2153, GNorm = 0.4954, lr_0 = 1.7468e-04
Loss = 3.8982e-02, PNorm = 69.2186, GNorm = 0.6339, lr_0 = 1.7456e-04
Loss = 4.3825e-02, PNorm = 69.2219, GNorm = 0.5929, lr_0 = 1.7444e-04
Loss = 3.5491e-02, PNorm = 69.2239, GNorm = 0.4336, lr_0 = 1.7432e-04
Loss = 3.7725e-02, PNorm = 69.2263, GNorm = 0.5010, lr_0 = 1.7420e-04
Loss = 3.8641e-02, PNorm = 69.2290, GNorm = 0.6998, lr_0 = 1.7408e-04
Loss = 4.0877e-02, PNorm = 69.2314, GNorm = 0.5090, lr_0 = 1.7396e-04
Loss = 4.0787e-02, PNorm = 69.2339, GNorm = 0.5335, lr_0 = 1.7384e-04
Loss = 4.1234e-02, PNorm = 69.2360, GNorm = 0.4461, lr_0 = 1.7372e-04
Loss = 4.0894e-02, PNorm = 69.2379, GNorm = 0.6443, lr_0 = 1.7360e-04
Loss = 3.8882e-02, PNorm = 69.2389, GNorm = 0.4104, lr_0 = 1.7348e-04
Loss = 4.0886e-02, PNorm = 69.2407, GNorm = 0.8479, lr_0 = 1.7336e-04
Loss = 4.0890e-02, PNorm = 69.2434, GNorm = 0.8297, lr_0 = 1.7325e-04
Loss = 4.0954e-02, PNorm = 69.2457, GNorm = 0.5353, lr_0 = 1.7313e-04
Loss = 4.0628e-02, PNorm = 69.2478, GNorm = 0.4617, lr_0 = 1.7301e-04
Loss = 4.0634e-02, PNorm = 69.2511, GNorm = 0.4464, lr_0 = 1.7289e-04
Loss = 4.3800e-02, PNorm = 69.2547, GNorm = 0.6012, lr_0 = 1.7277e-04
Loss = 3.8905e-02, PNorm = 69.2560, GNorm = 0.4817, lr_0 = 1.7265e-04
Loss = 3.4940e-02, PNorm = 69.2574, GNorm = 0.4447, lr_0 = 1.7253e-04
Loss = 4.8824e-02, PNorm = 69.2615, GNorm = 0.4877, lr_0 = 1.7242e-04
Loss = 3.5109e-02, PNorm = 69.2652, GNorm = 0.5061, lr_0 = 1.7230e-04
Loss = 3.3550e-02, PNorm = 69.2669, GNorm = 0.5533, lr_0 = 1.7218e-04
Loss = 3.1946e-02, PNorm = 69.2694, GNorm = 0.5071, lr_0 = 1.7206e-04
Loss = 4.0974e-02, PNorm = 69.2717, GNorm = 0.6731, lr_0 = 1.7194e-04
Loss = 4.6843e-02, PNorm = 69.2722, GNorm = 0.4183, lr_0 = 1.7183e-04
Loss = 4.4928e-02, PNorm = 69.2749, GNorm = 0.5020, lr_0 = 1.7171e-04
Loss = 4.2312e-02, PNorm = 69.2773, GNorm = 0.5757, lr_0 = 1.7159e-04
Loss = 3.7703e-02, PNorm = 69.2800, GNorm = 0.5063, lr_0 = 1.7147e-04
Loss = 4.3823e-02, PNorm = 69.2827, GNorm = 0.4869, lr_0 = 1.7136e-04
Loss = 4.1851e-02, PNorm = 69.2867, GNorm = 0.4049, lr_0 = 1.7124e-04
Loss = 3.7986e-02, PNorm = 69.2892, GNorm = 0.4645, lr_0 = 1.7112e-04
Loss = 4.0577e-02, PNorm = 69.2916, GNorm = 0.6326, lr_0 = 1.7100e-04
Loss = 3.9461e-02, PNorm = 69.2945, GNorm = 0.4373, lr_0 = 1.7089e-04
Loss = 3.6870e-02, PNorm = 69.2974, GNorm = 0.4585, lr_0 = 1.7077e-04
Loss = 4.3378e-02, PNorm = 69.3011, GNorm = 0.6910, lr_0 = 1.7065e-04
Loss = 3.9963e-02, PNorm = 69.3033, GNorm = 0.2551, lr_0 = 1.7054e-04
Loss = 4.2912e-02, PNorm = 69.3050, GNorm = 0.5461, lr_0 = 1.7042e-04
Loss = 4.6506e-02, PNorm = 69.3066, GNorm = 0.5329, lr_0 = 1.7030e-04
Loss = 3.8058e-02, PNorm = 69.3092, GNorm = 0.5679, lr_0 = 1.7019e-04
Loss = 4.3480e-02, PNorm = 69.3136, GNorm = 0.5267, lr_0 = 1.7007e-04
Loss = 4.2791e-02, PNorm = 69.3155, GNorm = 0.6000, lr_0 = 1.6995e-04
Loss = 3.8347e-02, PNorm = 69.3184, GNorm = 0.5836, lr_0 = 1.6984e-04
Loss = 3.5824e-02, PNorm = 69.3215, GNorm = 0.4913, lr_0 = 1.6972e-04
Loss = 3.3366e-02, PNorm = 69.3230, GNorm = 0.4227, lr_0 = 1.6960e-04
Loss = 3.7380e-02, PNorm = 69.3251, GNorm = 0.4345, lr_0 = 1.6949e-04
Loss = 4.9140e-02, PNorm = 69.3293, GNorm = 0.6411, lr_0 = 1.6937e-04
Loss = 4.4559e-02, PNorm = 69.3320, GNorm = 0.4532, lr_0 = 1.6926e-04
Loss = 4.4000e-02, PNorm = 69.3342, GNorm = 0.7104, lr_0 = 1.6914e-04
Loss = 3.6383e-02, PNorm = 69.3373, GNorm = 0.3447, lr_0 = 1.6902e-04
Loss = 3.8580e-02, PNorm = 69.3389, GNorm = 0.4556, lr_0 = 1.6891e-04
Loss = 4.2329e-02, PNorm = 69.3416, GNorm = 0.3769, lr_0 = 1.6879e-04
Loss = 4.4827e-02, PNorm = 69.3440, GNorm = 0.6918, lr_0 = 1.6868e-04
Loss = 3.9140e-02, PNorm = 69.3456, GNorm = 0.3955, lr_0 = 1.6856e-04
Loss = 3.9400e-02, PNorm = 69.3479, GNorm = 0.4697, lr_0 = 1.6845e-04
Loss = 3.4831e-02, PNorm = 69.3490, GNorm = 0.4297, lr_0 = 1.6833e-04
Loss = 4.3919e-02, PNorm = 69.3503, GNorm = 0.6220, lr_0 = 1.6821e-04
Loss = 4.3590e-02, PNorm = 69.3528, GNorm = 0.7189, lr_0 = 1.6810e-04
Loss = 4.3814e-02, PNorm = 69.3543, GNorm = 0.4697, lr_0 = 1.6798e-04
Loss = 4.0524e-02, PNorm = 69.3570, GNorm = 0.4783, lr_0 = 1.6787e-04
Loss = 3.7459e-02, PNorm = 69.3590, GNorm = 0.6241, lr_0 = 1.6775e-04
Loss = 3.8015e-02, PNorm = 69.3618, GNorm = 0.5160, lr_0 = 1.6764e-04
Loss = 3.9770e-02, PNorm = 69.3634, GNorm = 0.6586, lr_0 = 1.6752e-04
Loss = 4.2487e-02, PNorm = 69.3670, GNorm = 0.5020, lr_0 = 1.6741e-04
Loss = 4.4268e-02, PNorm = 69.3706, GNorm = 0.7004, lr_0 = 1.6729e-04
Loss = 5.1102e-02, PNorm = 69.3726, GNorm = 0.6295, lr_0 = 1.6718e-04
Loss = 4.3818e-02, PNorm = 69.3754, GNorm = 0.5527, lr_0 = 1.6707e-04
Loss = 4.5616e-02, PNorm = 69.3776, GNorm = 0.5164, lr_0 = 1.6695e-04
Loss = 4.2977e-02, PNorm = 69.3810, GNorm = 0.5270, lr_0 = 1.6684e-04
Loss = 4.7948e-02, PNorm = 69.3839, GNorm = 0.9614, lr_0 = 1.6672e-04
Loss = 3.6421e-02, PNorm = 69.3856, GNorm = 1.0797, lr_0 = 1.6661e-04
Loss = 3.5784e-02, PNorm = 69.3881, GNorm = 0.5053, lr_0 = 1.6649e-04
Loss = 4.6331e-02, PNorm = 69.3911, GNorm = 0.5464, lr_0 = 1.6638e-04
Loss = 3.5446e-02, PNorm = 69.3933, GNorm = 0.5078, lr_0 = 1.6627e-04
Loss = 3.7504e-02, PNorm = 69.3948, GNorm = 0.4244, lr_0 = 1.6615e-04
Loss = 3.6013e-02, PNorm = 69.3973, GNorm = 0.4634, lr_0 = 1.6604e-04
Loss = 3.9484e-02, PNorm = 69.3989, GNorm = 0.5432, lr_0 = 1.6592e-04
Loss = 4.3330e-02, PNorm = 69.4010, GNorm = 0.4647, lr_0 = 1.6581e-04
Loss = 4.3928e-02, PNorm = 69.4043, GNorm = 0.5239, lr_0 = 1.6570e-04
Loss = 4.0361e-02, PNorm = 69.4069, GNorm = 0.6501, lr_0 = 1.6558e-04
Loss = 4.4414e-02, PNorm = 69.4110, GNorm = 0.5039, lr_0 = 1.6547e-04
Loss = 5.0184e-02, PNorm = 69.4138, GNorm = 0.5517, lr_0 = 1.6536e-04
Loss = 4.9844e-02, PNorm = 69.4167, GNorm = 0.5106, lr_0 = 1.6524e-04
Loss = 4.6725e-02, PNorm = 69.4204, GNorm = 0.5312, lr_0 = 1.6513e-04
Loss = 4.1128e-02, PNorm = 69.4241, GNorm = 0.5494, lr_0 = 1.6502e-04
Loss = 4.2616e-02, PNorm = 69.4263, GNorm = 0.5009, lr_0 = 1.6490e-04
Loss = 4.3733e-02, PNorm = 69.4284, GNorm = 0.8858, lr_0 = 1.6479e-04
Loss = 3.6831e-02, PNorm = 69.4299, GNorm = 0.4985, lr_0 = 1.6468e-04
Loss = 4.1969e-02, PNorm = 69.4328, GNorm = 0.5260, lr_0 = 1.6457e-04
Loss = 4.2577e-02, PNorm = 69.4352, GNorm = 0.3303, lr_0 = 1.6445e-04
Loss = 3.8473e-02, PNorm = 69.4382, GNorm = 0.4889, lr_0 = 1.6434e-04
Loss = 4.5169e-02, PNorm = 69.4413, GNorm = 0.5041, lr_0 = 1.6423e-04
Loss = 3.9102e-02, PNorm = 69.4441, GNorm = 0.4802, lr_0 = 1.6412e-04
Loss = 4.5701e-02, PNorm = 69.4466, GNorm = 0.5839, lr_0 = 1.6400e-04
Loss = 4.0572e-02, PNorm = 69.4498, GNorm = 0.5187, lr_0 = 1.6389e-04
Loss = 4.7294e-02, PNorm = 69.4515, GNorm = 0.4850, lr_0 = 1.6378e-04
Validation mae = 0.386895
Epoch 24
Loss = 3.5374e-02, PNorm = 69.4542, GNorm = 0.5712, lr_0 = 1.6367e-04
Loss = 4.1563e-02, PNorm = 69.4566, GNorm = 0.4591, lr_0 = 1.6355e-04
Loss = 3.6174e-02, PNorm = 69.4589, GNorm = 0.3876, lr_0 = 1.6344e-04
Loss = 3.4253e-02, PNorm = 69.4619, GNorm = 0.5029, lr_0 = 1.6333e-04
Loss = 3.5119e-02, PNorm = 69.4648, GNorm = 0.6896, lr_0 = 1.6322e-04
Loss = 3.8400e-02, PNorm = 69.4673, GNorm = 0.6042, lr_0 = 1.6311e-04
Loss = 3.8168e-02, PNorm = 69.4709, GNorm = 0.4343, lr_0 = 1.6299e-04
Loss = 3.8849e-02, PNorm = 69.4751, GNorm = 0.5994, lr_0 = 1.6288e-04
Loss = 4.0383e-02, PNorm = 69.4773, GNorm = 0.5214, lr_0 = 1.6277e-04
Loss = 3.2439e-02, PNorm = 69.4786, GNorm = 0.5379, lr_0 = 1.6266e-04
Loss = 3.4559e-02, PNorm = 69.4816, GNorm = 0.5309, lr_0 = 1.6255e-04
Loss = 3.4533e-02, PNorm = 69.4837, GNorm = 0.4440, lr_0 = 1.6244e-04
Loss = 3.3236e-02, PNorm = 69.4850, GNorm = 0.3810, lr_0 = 1.6233e-04
Loss = 3.6756e-02, PNorm = 69.4878, GNorm = 0.6165, lr_0 = 1.6221e-04
Loss = 4.2045e-02, PNorm = 69.4916, GNorm = 0.5723, lr_0 = 1.6210e-04
Loss = 3.4650e-02, PNorm = 69.4953, GNorm = 0.4781, lr_0 = 1.6199e-04
Loss = 3.4945e-02, PNorm = 69.4974, GNorm = 0.5565, lr_0 = 1.6188e-04
Loss = 4.2046e-02, PNorm = 69.4996, GNorm = 0.5911, lr_0 = 1.6177e-04
Loss = 3.9010e-02, PNorm = 69.5010, GNorm = 0.4419, lr_0 = 1.6166e-04
Loss = 4.4587e-02, PNorm = 69.5037, GNorm = 0.3749, lr_0 = 1.6155e-04
Loss = 3.5575e-02, PNorm = 69.5068, GNorm = 0.4489, lr_0 = 1.6144e-04
Loss = 3.3200e-02, PNorm = 69.5098, GNorm = 0.3483, lr_0 = 1.6133e-04
Loss = 3.9557e-02, PNorm = 69.5113, GNorm = 0.5108, lr_0 = 1.6122e-04
Loss = 3.4320e-02, PNorm = 69.5132, GNorm = 0.4508, lr_0 = 1.6111e-04
Loss = 4.5848e-02, PNorm = 69.5174, GNorm = 0.3994, lr_0 = 1.6100e-04
Loss = 3.9823e-02, PNorm = 69.5187, GNorm = 0.4276, lr_0 = 1.6089e-04
Loss = 4.1984e-02, PNorm = 69.5204, GNorm = 0.4685, lr_0 = 1.6078e-04
Loss = 3.8357e-02, PNorm = 69.5234, GNorm = 0.4413, lr_0 = 1.6067e-04
Loss = 3.5729e-02, PNorm = 69.5265, GNorm = 0.5208, lr_0 = 1.6056e-04
Loss = 3.4352e-02, PNorm = 69.5287, GNorm = 0.3876, lr_0 = 1.6045e-04
Loss = 3.4540e-02, PNorm = 69.5303, GNorm = 0.6042, lr_0 = 1.6034e-04
Loss = 3.4643e-02, PNorm = 69.5326, GNorm = 0.6812, lr_0 = 1.6023e-04
Loss = 3.0987e-02, PNorm = 69.5346, GNorm = 0.4509, lr_0 = 1.6012e-04
Loss = 3.4817e-02, PNorm = 69.5363, GNorm = 0.5032, lr_0 = 1.6001e-04
Loss = 3.2414e-02, PNorm = 69.5380, GNorm = 0.5428, lr_0 = 1.5990e-04
Loss = 4.0969e-02, PNorm = 69.5412, GNorm = 0.5443, lr_0 = 1.5979e-04
Loss = 4.0317e-02, PNorm = 69.5443, GNorm = 0.6249, lr_0 = 1.5968e-04
Loss = 3.8065e-02, PNorm = 69.5470, GNorm = 0.6343, lr_0 = 1.5957e-04
Loss = 3.8610e-02, PNorm = 69.5497, GNorm = 0.4115, lr_0 = 1.5946e-04
Loss = 4.0935e-02, PNorm = 69.5518, GNorm = 0.3976, lr_0 = 1.5935e-04
Loss = 4.2488e-02, PNorm = 69.5543, GNorm = 0.5029, lr_0 = 1.5924e-04
Loss = 3.7608e-02, PNorm = 69.5573, GNorm = 0.4496, lr_0 = 1.5913e-04
Loss = 3.5827e-02, PNorm = 69.5590, GNorm = 0.3708, lr_0 = 1.5902e-04
Loss = 4.2269e-02, PNorm = 69.5618, GNorm = 0.6055, lr_0 = 1.5891e-04
Loss = 4.0335e-02, PNorm = 69.5664, GNorm = 0.5627, lr_0 = 1.5880e-04
Loss = 3.8807e-02, PNorm = 69.5718, GNorm = 0.6612, lr_0 = 1.5870e-04
Loss = 4.1240e-02, PNorm = 69.5752, GNorm = 0.5652, lr_0 = 1.5859e-04
Loss = 3.1570e-02, PNorm = 69.5775, GNorm = 0.4154, lr_0 = 1.5848e-04
Loss = 3.5860e-02, PNorm = 69.5790, GNorm = 0.3962, lr_0 = 1.5837e-04
Loss = 3.7061e-02, PNorm = 69.5786, GNorm = 0.3589, lr_0 = 1.5826e-04
Loss = 3.6631e-02, PNorm = 69.5795, GNorm = 0.5141, lr_0 = 1.5815e-04
Loss = 4.1677e-02, PNorm = 69.5828, GNorm = 0.9097, lr_0 = 1.5804e-04
Loss = 3.7763e-02, PNorm = 69.5866, GNorm = 0.4784, lr_0 = 1.5794e-04
Loss = 3.6793e-02, PNorm = 69.5876, GNorm = 0.6056, lr_0 = 1.5783e-04
Loss = 3.6069e-02, PNorm = 69.5901, GNorm = 0.5597, lr_0 = 1.5772e-04
Loss = 4.3030e-02, PNorm = 69.5928, GNorm = 0.6118, lr_0 = 1.5761e-04
Loss = 3.4976e-02, PNorm = 69.5963, GNorm = 0.5210, lr_0 = 1.5750e-04
Loss = 4.6153e-02, PNorm = 69.5975, GNorm = 0.6341, lr_0 = 1.5740e-04
Loss = 3.5060e-02, PNorm = 69.5991, GNorm = 0.5470, lr_0 = 1.5729e-04
Loss = 3.7174e-02, PNorm = 69.6009, GNorm = 0.3784, lr_0 = 1.5718e-04
Loss = 4.6171e-02, PNorm = 69.6037, GNorm = 0.6370, lr_0 = 1.5707e-04
Loss = 3.8184e-02, PNorm = 69.6057, GNorm = 0.5097, lr_0 = 1.5697e-04
Loss = 3.7922e-02, PNorm = 69.6070, GNorm = 0.6312, lr_0 = 1.5686e-04
Loss = 4.3604e-02, PNorm = 69.6094, GNorm = 0.4777, lr_0 = 1.5675e-04
Loss = 3.8008e-02, PNorm = 69.6131, GNorm = 0.5104, lr_0 = 1.5664e-04
Loss = 4.1905e-02, PNorm = 69.6164, GNorm = 0.4877, lr_0 = 1.5654e-04
Loss = 4.5803e-02, PNorm = 69.6192, GNorm = 0.8315, lr_0 = 1.5643e-04
Loss = 3.6887e-02, PNorm = 69.6219, GNorm = 0.5964, lr_0 = 1.5632e-04
Loss = 3.2465e-02, PNorm = 69.6237, GNorm = 0.3527, lr_0 = 1.5621e-04
Loss = 4.0219e-02, PNorm = 69.6261, GNorm = 0.7741, lr_0 = 1.5611e-04
Loss = 3.4614e-02, PNorm = 69.6278, GNorm = 0.8017, lr_0 = 1.5600e-04
Loss = 3.6386e-02, PNorm = 69.6292, GNorm = 0.5019, lr_0 = 1.5589e-04
Loss = 4.3917e-02, PNorm = 69.6325, GNorm = 0.5182, lr_0 = 1.5579e-04
Loss = 4.2117e-02, PNorm = 69.6358, GNorm = 0.4247, lr_0 = 1.5568e-04
Loss = 3.2085e-02, PNorm = 69.6383, GNorm = 0.4845, lr_0 = 1.5557e-04
Loss = 4.1662e-02, PNorm = 69.6400, GNorm = 0.6457, lr_0 = 1.5547e-04
Loss = 4.6309e-02, PNorm = 69.6420, GNorm = 0.4502, lr_0 = 1.5536e-04
Loss = 3.7531e-02, PNorm = 69.6442, GNorm = 0.4850, lr_0 = 1.5525e-04
Loss = 3.2008e-02, PNorm = 69.6464, GNorm = 0.4138, lr_0 = 1.5515e-04
Loss = 3.7777e-02, PNorm = 69.6487, GNorm = 0.4048, lr_0 = 1.5504e-04
Loss = 4.0571e-02, PNorm = 69.6508, GNorm = 0.3911, lr_0 = 1.5493e-04
Loss = 4.0986e-02, PNorm = 69.6524, GNorm = 0.4464, lr_0 = 1.5483e-04
Loss = 4.1392e-02, PNorm = 69.6541, GNorm = 0.5120, lr_0 = 1.5472e-04
Loss = 3.3141e-02, PNorm = 69.6564, GNorm = 0.5294, lr_0 = 1.5462e-04
Loss = 3.8886e-02, PNorm = 69.6587, GNorm = 0.5226, lr_0 = 1.5451e-04
Loss = 3.8677e-02, PNorm = 69.6598, GNorm = 0.8305, lr_0 = 1.5440e-04
Loss = 3.7181e-02, PNorm = 69.6609, GNorm = 0.7535, lr_0 = 1.5430e-04
Loss = 4.3019e-02, PNorm = 69.6622, GNorm = 0.4742, lr_0 = 1.5419e-04
Loss = 3.9196e-02, PNorm = 69.6634, GNorm = 0.4743, lr_0 = 1.5409e-04
Loss = 3.9237e-02, PNorm = 69.6664, GNorm = 0.4720, lr_0 = 1.5398e-04
Loss = 3.9754e-02, PNorm = 69.6692, GNorm = 0.8739, lr_0 = 1.5388e-04
Loss = 3.9406e-02, PNorm = 69.6699, GNorm = 0.5780, lr_0 = 1.5377e-04
Loss = 3.5019e-02, PNorm = 69.6710, GNorm = 0.4451, lr_0 = 1.5367e-04
Loss = 4.5440e-02, PNorm = 69.6736, GNorm = 1.0138, lr_0 = 1.5356e-04
Loss = 4.1217e-02, PNorm = 69.6766, GNorm = 0.4843, lr_0 = 1.5346e-04
Loss = 4.8914e-02, PNorm = 69.6785, GNorm = 0.4454, lr_0 = 1.5335e-04
Loss = 4.0455e-02, PNorm = 69.6812, GNorm = 0.4099, lr_0 = 1.5325e-04
Loss = 3.6426e-02, PNorm = 69.6834, GNorm = 0.4757, lr_0 = 1.5314e-04
Loss = 3.7516e-02, PNorm = 69.6849, GNorm = 0.6297, lr_0 = 1.5304e-04
Loss = 3.7073e-02, PNorm = 69.6858, GNorm = 0.4339, lr_0 = 1.5293e-04
Loss = 4.3751e-02, PNorm = 69.6883, GNorm = 0.5983, lr_0 = 1.5283e-04
Loss = 3.9344e-02, PNorm = 69.6910, GNorm = 0.8144, lr_0 = 1.5272e-04
Loss = 4.0710e-02, PNorm = 69.6918, GNorm = 0.4837, lr_0 = 1.5262e-04
Loss = 4.1299e-02, PNorm = 69.6939, GNorm = 0.9223, lr_0 = 1.5251e-04
Loss = 4.1453e-02, PNorm = 69.6946, GNorm = 0.5475, lr_0 = 1.5241e-04
Loss = 3.8599e-02, PNorm = 69.6965, GNorm = 0.4303, lr_0 = 1.5230e-04
Loss = 4.3504e-02, PNorm = 69.6999, GNorm = 1.1693, lr_0 = 1.5220e-04
Loss = 4.1934e-02, PNorm = 69.7027, GNorm = 0.7699, lr_0 = 1.5209e-04
Loss = 3.6273e-02, PNorm = 69.7050, GNorm = 0.4478, lr_0 = 1.5199e-04
Loss = 4.7662e-02, PNorm = 69.7077, GNorm = 0.8072, lr_0 = 1.5189e-04
Loss = 3.9122e-02, PNorm = 69.7095, GNorm = 0.7007, lr_0 = 1.5178e-04
Loss = 4.1186e-02, PNorm = 69.7113, GNorm = 0.4410, lr_0 = 1.5168e-04
Loss = 4.3254e-02, PNorm = 69.7137, GNorm = 0.4662, lr_0 = 1.5157e-04
Loss = 3.7388e-02, PNorm = 69.7160, GNorm = 0.5322, lr_0 = 1.5147e-04
Loss = 4.1278e-02, PNorm = 69.7190, GNorm = 0.3660, lr_0 = 1.5137e-04
Loss = 3.4504e-02, PNorm = 69.7209, GNorm = 0.3715, lr_0 = 1.5126e-04
Loss = 3.8852e-02, PNorm = 69.7237, GNorm = 0.5500, lr_0 = 1.5116e-04
Loss = 3.8261e-02, PNorm = 69.7262, GNorm = 0.5536, lr_0 = 1.5106e-04
Loss = 3.2180e-02, PNorm = 69.7291, GNorm = 0.5484, lr_0 = 1.5095e-04
Loss = 4.4892e-02, PNorm = 69.7309, GNorm = 0.6971, lr_0 = 1.5085e-04
Validation mae = 0.394520
Epoch 25
Loss = 3.3533e-02, PNorm = 69.7322, GNorm = 0.4173, lr_0 = 1.5075e-04
Loss = 2.8482e-02, PNorm = 69.7346, GNorm = 0.4051, lr_0 = 1.5064e-04
Loss = 2.9529e-02, PNorm = 69.7375, GNorm = 0.4482, lr_0 = 1.5054e-04
Loss = 3.1666e-02, PNorm = 69.7386, GNorm = 0.4462, lr_0 = 1.5044e-04
Loss = 3.5684e-02, PNorm = 69.7401, GNorm = 0.5111, lr_0 = 1.5033e-04
Loss = 3.4808e-02, PNorm = 69.7415, GNorm = 0.4676, lr_0 = 1.5023e-04
Loss = 3.9425e-02, PNorm = 69.7434, GNorm = 0.5092, lr_0 = 1.5013e-04
Loss = 3.3883e-02, PNorm = 69.7461, GNorm = 0.6821, lr_0 = 1.5002e-04
Loss = 3.3822e-02, PNorm = 69.7487, GNorm = 0.5132, lr_0 = 1.4992e-04
Loss = 3.3355e-02, PNorm = 69.7506, GNorm = 0.4426, lr_0 = 1.4982e-04
Loss = 3.1854e-02, PNorm = 69.7526, GNorm = 0.4506, lr_0 = 1.4972e-04
Loss = 2.8181e-02, PNorm = 69.7551, GNorm = 0.4253, lr_0 = 1.4961e-04
Loss = 3.4176e-02, PNorm = 69.7570, GNorm = 0.4806, lr_0 = 1.4951e-04
Loss = 3.2654e-02, PNorm = 69.7591, GNorm = 0.5589, lr_0 = 1.4941e-04
Loss = 3.8744e-02, PNorm = 69.7622, GNorm = 0.4397, lr_0 = 1.4931e-04
Loss = 3.6990e-02, PNorm = 69.7637, GNorm = 0.5971, lr_0 = 1.4920e-04
Loss = 3.2680e-02, PNorm = 69.7656, GNorm = 0.6052, lr_0 = 1.4910e-04
Loss = 3.7694e-02, PNorm = 69.7691, GNorm = 0.4725, lr_0 = 1.4900e-04
Loss = 3.5664e-02, PNorm = 69.7708, GNorm = 0.4663, lr_0 = 1.4890e-04
Loss = 3.0466e-02, PNorm = 69.7727, GNorm = 0.4557, lr_0 = 1.4880e-04
Loss = 3.1373e-02, PNorm = 69.7755, GNorm = 0.3982, lr_0 = 1.4869e-04
Loss = 3.5062e-02, PNorm = 69.7772, GNorm = 0.4835, lr_0 = 1.4859e-04
Loss = 4.1596e-02, PNorm = 69.7794, GNorm = 0.5039, lr_0 = 1.4849e-04
Loss = 3.1094e-02, PNorm = 69.7806, GNorm = 0.4966, lr_0 = 1.4839e-04
Loss = 3.3971e-02, PNorm = 69.7821, GNorm = 0.5633, lr_0 = 1.4829e-04
Loss = 3.5322e-02, PNorm = 69.7835, GNorm = 0.8249, lr_0 = 1.4818e-04
Loss = 3.7297e-02, PNorm = 69.7852, GNorm = 0.6224, lr_0 = 1.4808e-04
Loss = 3.3341e-02, PNorm = 69.7886, GNorm = 0.4662, lr_0 = 1.4798e-04
Loss = 3.3921e-02, PNorm = 69.7912, GNorm = 0.3895, lr_0 = 1.4788e-04
Loss = 3.1975e-02, PNorm = 69.7931, GNorm = 0.5768, lr_0 = 1.4778e-04
Loss = 3.2599e-02, PNorm = 69.7957, GNorm = 0.4447, lr_0 = 1.4768e-04
Loss = 3.4624e-02, PNorm = 69.7985, GNorm = 0.4778, lr_0 = 1.4758e-04
Loss = 3.2436e-02, PNorm = 69.8012, GNorm = 0.3908, lr_0 = 1.4748e-04
Loss = 3.9250e-02, PNorm = 69.8034, GNorm = 0.8795, lr_0 = 1.4737e-04
Loss = 3.6352e-02, PNorm = 69.8051, GNorm = 0.4616, lr_0 = 1.4727e-04
Loss = 3.5680e-02, PNorm = 69.8070, GNorm = 0.4310, lr_0 = 1.4717e-04
Loss = 3.6566e-02, PNorm = 69.8097, GNorm = 0.7000, lr_0 = 1.4707e-04
Loss = 3.6254e-02, PNorm = 69.8114, GNorm = 0.4818, lr_0 = 1.4697e-04
Loss = 3.7915e-02, PNorm = 69.8136, GNorm = 0.3327, lr_0 = 1.4687e-04
Loss = 3.3372e-02, PNorm = 69.8153, GNorm = 0.6509, lr_0 = 1.4677e-04
Loss = 4.0117e-02, PNorm = 69.8167, GNorm = 0.4702, lr_0 = 1.4667e-04
Loss = 3.9246e-02, PNorm = 69.8202, GNorm = 0.5276, lr_0 = 1.4657e-04
Loss = 3.5447e-02, PNorm = 69.8234, GNorm = 0.7461, lr_0 = 1.4647e-04
Loss = 3.4299e-02, PNorm = 69.8260, GNorm = 0.5426, lr_0 = 1.4637e-04
Loss = 3.6628e-02, PNorm = 69.8287, GNorm = 0.5016, lr_0 = 1.4627e-04
Loss = 3.7271e-02, PNorm = 69.8314, GNorm = 0.6968, lr_0 = 1.4617e-04
Loss = 3.7855e-02, PNorm = 69.8343, GNorm = 0.6502, lr_0 = 1.4607e-04
Loss = 3.2737e-02, PNorm = 69.8369, GNorm = 0.4887, lr_0 = 1.4597e-04
Loss = 3.4972e-02, PNorm = 69.8377, GNorm = 0.5111, lr_0 = 1.4587e-04
Loss = 3.7290e-02, PNorm = 69.8394, GNorm = 0.4736, lr_0 = 1.4577e-04
Loss = 3.8289e-02, PNorm = 69.8426, GNorm = 0.4754, lr_0 = 1.4567e-04
Loss = 4.1628e-02, PNorm = 69.8457, GNorm = 0.6432, lr_0 = 1.4557e-04
Loss = 4.2010e-02, PNorm = 69.8464, GNorm = 0.5753, lr_0 = 1.4547e-04
Loss = 4.0590e-02, PNorm = 69.8486, GNorm = 0.8239, lr_0 = 1.4537e-04
Loss = 3.3035e-02, PNorm = 69.8516, GNorm = 0.5695, lr_0 = 1.4527e-04
Loss = 3.2079e-02, PNorm = 69.8534, GNorm = 0.4810, lr_0 = 1.4517e-04
Loss = 3.3480e-02, PNorm = 69.8547, GNorm = 0.4320, lr_0 = 1.4507e-04
Loss = 3.3590e-02, PNorm = 69.8559, GNorm = 0.3405, lr_0 = 1.4497e-04
Loss = 3.5870e-02, PNorm = 69.8579, GNorm = 0.4896, lr_0 = 1.4487e-04
Loss = 3.0435e-02, PNorm = 69.8600, GNorm = 0.4274, lr_0 = 1.4477e-04
Loss = 3.4063e-02, PNorm = 69.8623, GNorm = 0.4671, lr_0 = 1.4467e-04
Loss = 4.0742e-02, PNorm = 69.8646, GNorm = 0.4131, lr_0 = 1.4457e-04
Loss = 4.2270e-02, PNorm = 69.8663, GNorm = 0.3839, lr_0 = 1.4447e-04
Loss = 3.9116e-02, PNorm = 69.8676, GNorm = 0.4488, lr_0 = 1.4438e-04
Loss = 3.7939e-02, PNorm = 69.8700, GNorm = 0.3952, lr_0 = 1.4428e-04
Loss = 4.2818e-02, PNorm = 69.8719, GNorm = 0.5094, lr_0 = 1.4418e-04
Loss = 3.1690e-02, PNorm = 69.8731, GNorm = 0.4172, lr_0 = 1.4408e-04
Loss = 4.4742e-02, PNorm = 69.8746, GNorm = 0.5664, lr_0 = 1.4398e-04
Loss = 3.2473e-02, PNorm = 69.8766, GNorm = 0.5323, lr_0 = 1.4388e-04
Loss = 3.9472e-02, PNorm = 69.8795, GNorm = 0.4680, lr_0 = 1.4378e-04
Loss = 4.0653e-02, PNorm = 69.8823, GNorm = 0.8632, lr_0 = 1.4368e-04
Loss = 4.0249e-02, PNorm = 69.8851, GNorm = 0.4980, lr_0 = 1.4359e-04
Loss = 3.7854e-02, PNorm = 69.8875, GNorm = 0.5181, lr_0 = 1.4349e-04
Loss = 3.8489e-02, PNorm = 69.8885, GNorm = 0.5414, lr_0 = 1.4339e-04
Loss = 3.8782e-02, PNorm = 69.8900, GNorm = 0.3704, lr_0 = 1.4329e-04
Loss = 4.2222e-02, PNorm = 69.8923, GNorm = 0.4675, lr_0 = 1.4319e-04
Loss = 4.1211e-02, PNorm = 69.8950, GNorm = 0.5683, lr_0 = 1.4310e-04
Loss = 3.9996e-02, PNorm = 69.8972, GNorm = 0.4757, lr_0 = 1.4300e-04
Loss = 4.2806e-02, PNorm = 69.8990, GNorm = 0.5214, lr_0 = 1.4290e-04
Loss = 4.0528e-02, PNorm = 69.9018, GNorm = 0.5520, lr_0 = 1.4280e-04
Loss = 3.9112e-02, PNorm = 69.9041, GNorm = 0.5524, lr_0 = 1.4270e-04
Loss = 3.4937e-02, PNorm = 69.9054, GNorm = 0.4501, lr_0 = 1.4261e-04
Loss = 3.8940e-02, PNorm = 69.9057, GNorm = 0.4904, lr_0 = 1.4251e-04
Loss = 3.8189e-02, PNorm = 69.9074, GNorm = 0.5716, lr_0 = 1.4241e-04
Loss = 3.8872e-02, PNorm = 69.9099, GNorm = 0.5399, lr_0 = 1.4231e-04
Loss = 3.6194e-02, PNorm = 69.9122, GNorm = 0.5225, lr_0 = 1.4222e-04
Loss = 3.2043e-02, PNorm = 69.9146, GNorm = 0.3915, lr_0 = 1.4212e-04
Loss = 4.3938e-02, PNorm = 69.9168, GNorm = 0.5929, lr_0 = 1.4202e-04
Loss = 4.1504e-02, PNorm = 69.9189, GNorm = 0.7247, lr_0 = 1.4192e-04
Loss = 3.5729e-02, PNorm = 69.9214, GNorm = 0.7063, lr_0 = 1.4183e-04
Loss = 4.0338e-02, PNorm = 69.9246, GNorm = 0.3709, lr_0 = 1.4173e-04
Loss = 3.3835e-02, PNorm = 69.9258, GNorm = 0.5598, lr_0 = 1.4163e-04
Loss = 3.2885e-02, PNorm = 69.9272, GNorm = 0.5798, lr_0 = 1.4153e-04
Loss = 4.6536e-02, PNorm = 69.9291, GNorm = 0.5616, lr_0 = 1.4144e-04
Loss = 3.4921e-02, PNorm = 69.9314, GNorm = 0.4641, lr_0 = 1.4134e-04
Loss = 3.7525e-02, PNorm = 69.9332, GNorm = 0.5645, lr_0 = 1.4124e-04
Loss = 3.8346e-02, PNorm = 69.9345, GNorm = 0.6964, lr_0 = 1.4115e-04
Loss = 4.5421e-02, PNorm = 69.9373, GNorm = 0.3922, lr_0 = 1.4105e-04
Loss = 3.5458e-02, PNorm = 69.9393, GNorm = 0.5483, lr_0 = 1.4095e-04
Loss = 4.9024e-02, PNorm = 69.9417, GNorm = 0.5981, lr_0 = 1.4086e-04
Loss = 4.0545e-02, PNorm = 69.9441, GNorm = 0.4709, lr_0 = 1.4076e-04
Loss = 3.5505e-02, PNorm = 69.9469, GNorm = 0.6067, lr_0 = 1.4066e-04
Loss = 3.3272e-02, PNorm = 69.9488, GNorm = 0.4160, lr_0 = 1.4057e-04
Loss = 3.8871e-02, PNorm = 69.9505, GNorm = 0.4084, lr_0 = 1.4047e-04
Loss = 3.5333e-02, PNorm = 69.9527, GNorm = 0.3358, lr_0 = 1.4038e-04
Loss = 3.8577e-02, PNorm = 69.9557, GNorm = 0.4693, lr_0 = 1.4028e-04
Loss = 3.8476e-02, PNorm = 69.9571, GNorm = 0.4579, lr_0 = 1.4018e-04
Loss = 4.3168e-02, PNorm = 69.9596, GNorm = 0.4897, lr_0 = 1.4009e-04
Loss = 3.5038e-02, PNorm = 69.9617, GNorm = 0.4662, lr_0 = 1.3999e-04
Loss = 3.7522e-02, PNorm = 69.9631, GNorm = 0.4358, lr_0 = 1.3990e-04
Loss = 3.9080e-02, PNorm = 69.9658, GNorm = 0.4986, lr_0 = 1.3980e-04
Loss = 3.7459e-02, PNorm = 69.9687, GNorm = 0.5464, lr_0 = 1.3970e-04
Loss = 4.0401e-02, PNorm = 69.9703, GNorm = 0.7763, lr_0 = 1.3961e-04
Loss = 4.0148e-02, PNorm = 69.9711, GNorm = 0.4423, lr_0 = 1.3951e-04
Loss = 3.8513e-02, PNorm = 69.9731, GNorm = 0.7520, lr_0 = 1.3942e-04
Loss = 3.9476e-02, PNorm = 69.9749, GNorm = 0.5267, lr_0 = 1.3932e-04
Loss = 4.0715e-02, PNorm = 69.9757, GNorm = 0.4347, lr_0 = 1.3923e-04
Loss = 4.0570e-02, PNorm = 69.9773, GNorm = 0.6253, lr_0 = 1.3913e-04
Loss = 3.8965e-02, PNorm = 69.9791, GNorm = 0.6780, lr_0 = 1.3904e-04
Loss = 4.3043e-02, PNorm = 69.9798, GNorm = 0.5033, lr_0 = 1.3894e-04
Validation mae = 0.386718
Epoch 26
Loss = 3.4772e-02, PNorm = 69.9812, GNorm = 0.4733, lr_0 = 1.3884e-04
Loss = 3.5605e-02, PNorm = 69.9837, GNorm = 0.5214, lr_0 = 1.3875e-04
Loss = 2.8643e-02, PNorm = 69.9858, GNorm = 0.4076, lr_0 = 1.3865e-04
Loss = 3.1462e-02, PNorm = 69.9883, GNorm = 0.3736, lr_0 = 1.3856e-04
Loss = 3.2733e-02, PNorm = 69.9902, GNorm = 0.6244, lr_0 = 1.3846e-04
Loss = 3.0953e-02, PNorm = 69.9917, GNorm = 0.5718, lr_0 = 1.3837e-04
Loss = 3.4815e-02, PNorm = 69.9928, GNorm = 0.5986, lr_0 = 1.3828e-04
Loss = 3.4372e-02, PNorm = 69.9942, GNorm = 0.5752, lr_0 = 1.3818e-04
Loss = 3.2873e-02, PNorm = 69.9962, GNorm = 0.4151, lr_0 = 1.3809e-04
Loss = 3.1903e-02, PNorm = 69.9984, GNorm = 0.3867, lr_0 = 1.3799e-04
Loss = 3.5402e-02, PNorm = 70.0007, GNorm = 0.4813, lr_0 = 1.3790e-04
Loss = 3.1035e-02, PNorm = 70.0033, GNorm = 0.4345, lr_0 = 1.3780e-04
Loss = 3.3120e-02, PNorm = 70.0053, GNorm = 0.4601, lr_0 = 1.3771e-04
Loss = 3.3319e-02, PNorm = 70.0071, GNorm = 0.6201, lr_0 = 1.3761e-04
Loss = 2.7119e-02, PNorm = 70.0090, GNorm = 0.3794, lr_0 = 1.3752e-04
Loss = 4.3869e-02, PNorm = 70.0118, GNorm = 0.4184, lr_0 = 1.3742e-04
Loss = 3.3469e-02, PNorm = 70.0141, GNorm = 0.3835, lr_0 = 1.3733e-04
Loss = 3.5046e-02, PNorm = 70.0161, GNorm = 0.5524, lr_0 = 1.3724e-04
Loss = 3.8803e-02, PNorm = 70.0180, GNorm = 0.5241, lr_0 = 1.3714e-04
Loss = 3.1858e-02, PNorm = 70.0203, GNorm = 0.3787, lr_0 = 1.3705e-04
Loss = 3.3391e-02, PNorm = 70.0221, GNorm = 0.3509, lr_0 = 1.3695e-04
Loss = 3.5073e-02, PNorm = 70.0247, GNorm = 0.4375, lr_0 = 1.3686e-04
Loss = 2.8523e-02, PNorm = 70.0280, GNorm = 0.3293, lr_0 = 1.3677e-04
Loss = 3.1417e-02, PNorm = 70.0300, GNorm = 0.4633, lr_0 = 1.3667e-04
Loss = 3.1828e-02, PNorm = 70.0323, GNorm = 0.5022, lr_0 = 1.3658e-04
Loss = 3.3689e-02, PNorm = 70.0338, GNorm = 0.6531, lr_0 = 1.3649e-04
Loss = 3.2832e-02, PNorm = 70.0358, GNorm = 0.6654, lr_0 = 1.3639e-04
Loss = 3.1107e-02, PNorm = 70.0378, GNorm = 0.3153, lr_0 = 1.3630e-04
Loss = 3.7531e-02, PNorm = 70.0395, GNorm = 0.4626, lr_0 = 1.3621e-04
Loss = 2.9988e-02, PNorm = 70.0419, GNorm = 0.4284, lr_0 = 1.3611e-04
Loss = 3.0401e-02, PNorm = 70.0454, GNorm = 0.4863, lr_0 = 1.3602e-04
Loss = 3.9569e-02, PNorm = 70.0470, GNorm = 0.5455, lr_0 = 1.3593e-04
Loss = 3.2304e-02, PNorm = 70.0482, GNorm = 0.3955, lr_0 = 1.3583e-04
Loss = 3.4091e-02, PNorm = 70.0503, GNorm = 0.5292, lr_0 = 1.3574e-04
Loss = 3.1075e-02, PNorm = 70.0512, GNorm = 0.4990, lr_0 = 1.3565e-04
Loss = 3.6198e-02, PNorm = 70.0528, GNorm = 0.5428, lr_0 = 1.3555e-04
Loss = 2.6325e-02, PNorm = 70.0535, GNorm = 0.6688, lr_0 = 1.3546e-04
Loss = 2.9598e-02, PNorm = 70.0544, GNorm = 0.4504, lr_0 = 1.3537e-04
Loss = 3.3075e-02, PNorm = 70.0559, GNorm = 0.4921, lr_0 = 1.3528e-04
Loss = 3.7472e-02, PNorm = 70.0572, GNorm = 0.4063, lr_0 = 1.3518e-04
Loss = 3.0478e-02, PNorm = 70.0584, GNorm = 0.3389, lr_0 = 1.3509e-04
Loss = 3.5147e-02, PNorm = 70.0605, GNorm = 0.4389, lr_0 = 1.3500e-04
Loss = 3.3737e-02, PNorm = 70.0621, GNorm = 0.3524, lr_0 = 1.3491e-04
Loss = 3.4913e-02, PNorm = 70.0635, GNorm = 0.5353, lr_0 = 1.3481e-04
Loss = 3.3698e-02, PNorm = 70.0643, GNorm = 0.3918, lr_0 = 1.3472e-04
Loss = 3.7812e-02, PNorm = 70.0674, GNorm = 0.5956, lr_0 = 1.3463e-04
Loss = 3.4055e-02, PNorm = 70.0714, GNorm = 0.7391, lr_0 = 1.3454e-04
Loss = 3.1412e-02, PNorm = 70.0736, GNorm = 0.3998, lr_0 = 1.3444e-04
Loss = 4.1451e-02, PNorm = 70.0747, GNorm = 0.5123, lr_0 = 1.3435e-04
Loss = 3.7399e-02, PNorm = 70.0765, GNorm = 0.7033, lr_0 = 1.3426e-04
Loss = 4.3938e-02, PNorm = 70.0802, GNorm = 0.3972, lr_0 = 1.3417e-04
Loss = 3.8130e-02, PNorm = 70.0821, GNorm = 0.4073, lr_0 = 1.3408e-04
Loss = 3.5403e-02, PNorm = 70.0849, GNorm = 0.5458, lr_0 = 1.3398e-04
Loss = 3.4844e-02, PNorm = 70.0865, GNorm = 0.5575, lr_0 = 1.3389e-04
Loss = 3.9083e-02, PNorm = 70.0889, GNorm = 0.4583, lr_0 = 1.3380e-04
Loss = 3.9229e-02, PNorm = 70.0913, GNorm = 0.4142, lr_0 = 1.3371e-04
Loss = 3.4815e-02, PNorm = 70.0933, GNorm = 0.4784, lr_0 = 1.3362e-04
Loss = 3.3551e-02, PNorm = 70.0953, GNorm = 0.4998, lr_0 = 1.3353e-04
Loss = 3.7329e-02, PNorm = 70.0967, GNorm = 0.5374, lr_0 = 1.3343e-04
Loss = 3.5178e-02, PNorm = 70.0975, GNorm = 0.5434, lr_0 = 1.3334e-04
Loss = 3.3979e-02, PNorm = 70.0987, GNorm = 0.6226, lr_0 = 1.3325e-04
Loss = 3.3482e-02, PNorm = 70.1004, GNorm = 0.4660, lr_0 = 1.3316e-04
Loss = 4.1137e-02, PNorm = 70.1024, GNorm = 0.5849, lr_0 = 1.3307e-04
Loss = 3.2462e-02, PNorm = 70.1049, GNorm = 0.4659, lr_0 = 1.3298e-04
Loss = 3.3899e-02, PNorm = 70.1073, GNorm = 0.4669, lr_0 = 1.3289e-04
Loss = 4.0863e-02, PNorm = 70.1082, GNorm = 0.5805, lr_0 = 1.3280e-04
Loss = 3.3849e-02, PNorm = 70.1094, GNorm = 0.6267, lr_0 = 1.3270e-04
Loss = 3.2474e-02, PNorm = 70.1107, GNorm = 0.3537, lr_0 = 1.3261e-04
Loss = 3.7246e-02, PNorm = 70.1123, GNorm = 0.4506, lr_0 = 1.3252e-04
Loss = 3.4623e-02, PNorm = 70.1138, GNorm = 0.4632, lr_0 = 1.3243e-04
Loss = 3.2369e-02, PNorm = 70.1151, GNorm = 0.5247, lr_0 = 1.3234e-04
Loss = 3.3124e-02, PNorm = 70.1169, GNorm = 0.6105, lr_0 = 1.3225e-04
Loss = 3.3604e-02, PNorm = 70.1187, GNorm = 0.4014, lr_0 = 1.3216e-04
Loss = 3.7377e-02, PNorm = 70.1202, GNorm = 0.7206, lr_0 = 1.3207e-04
Loss = 3.2611e-02, PNorm = 70.1220, GNorm = 0.4255, lr_0 = 1.3198e-04
Loss = 3.2087e-02, PNorm = 70.1239, GNorm = 0.6286, lr_0 = 1.3189e-04
Loss = 3.3780e-02, PNorm = 70.1255, GNorm = 0.4652, lr_0 = 1.3180e-04
Loss = 3.6993e-02, PNorm = 70.1275, GNorm = 0.6341, lr_0 = 1.3171e-04
Loss = 3.5593e-02, PNorm = 70.1291, GNorm = 0.5306, lr_0 = 1.3162e-04
Loss = 3.5967e-02, PNorm = 70.1300, GNorm = 0.5166, lr_0 = 1.3153e-04
Loss = 2.8739e-02, PNorm = 70.1306, GNorm = 0.3945, lr_0 = 1.3144e-04
Loss = 4.0539e-02, PNorm = 70.1327, GNorm = 0.7839, lr_0 = 1.3135e-04
Loss = 3.2007e-02, PNorm = 70.1361, GNorm = 0.3956, lr_0 = 1.3126e-04
Loss = 3.2840e-02, PNorm = 70.1388, GNorm = 0.6139, lr_0 = 1.3117e-04
Loss = 3.3958e-02, PNorm = 70.1410, GNorm = 0.5231, lr_0 = 1.3108e-04
Loss = 3.1593e-02, PNorm = 70.1427, GNorm = 0.3660, lr_0 = 1.3099e-04
Loss = 3.9525e-02, PNorm = 70.1452, GNorm = 0.5314, lr_0 = 1.3090e-04
Loss = 4.1271e-02, PNorm = 70.1476, GNorm = 0.7207, lr_0 = 1.3081e-04
Loss = 3.4180e-02, PNorm = 70.1484, GNorm = 0.4432, lr_0 = 1.3072e-04
Loss = 3.5872e-02, PNorm = 70.1488, GNorm = 0.8694, lr_0 = 1.3063e-04
Loss = 3.0416e-02, PNorm = 70.1508, GNorm = 0.4542, lr_0 = 1.3054e-04
Loss = 3.4552e-02, PNorm = 70.1533, GNorm = 0.4157, lr_0 = 1.3045e-04
Loss = 3.5324e-02, PNorm = 70.1563, GNorm = 0.6593, lr_0 = 1.3036e-04
Loss = 3.7841e-02, PNorm = 70.1584, GNorm = 0.5111, lr_0 = 1.3027e-04
Loss = 3.4410e-02, PNorm = 70.1598, GNorm = 0.5359, lr_0 = 1.3018e-04
Loss = 3.5158e-02, PNorm = 70.1631, GNorm = 0.5376, lr_0 = 1.3009e-04
Loss = 3.3669e-02, PNorm = 70.1654, GNorm = 0.4304, lr_0 = 1.3000e-04
Loss = 3.4377e-02, PNorm = 70.1671, GNorm = 0.6428, lr_0 = 1.2992e-04
Loss = 3.7408e-02, PNorm = 70.1691, GNorm = 0.4519, lr_0 = 1.2983e-04
Loss = 3.3300e-02, PNorm = 70.1704, GNorm = 0.6944, lr_0 = 1.2974e-04
Loss = 3.7867e-02, PNorm = 70.1712, GNorm = 0.5526, lr_0 = 1.2965e-04
Loss = 4.2714e-02, PNorm = 70.1732, GNorm = 0.6834, lr_0 = 1.2956e-04
Loss = 3.7418e-02, PNorm = 70.1750, GNorm = 0.5624, lr_0 = 1.2947e-04
Loss = 3.6431e-02, PNorm = 70.1770, GNorm = 0.5367, lr_0 = 1.2938e-04
Loss = 3.5787e-02, PNorm = 70.1788, GNorm = 0.4183, lr_0 = 1.2929e-04
Loss = 3.8677e-02, PNorm = 70.1808, GNorm = 0.4928, lr_0 = 1.2921e-04
Loss = 3.5451e-02, PNorm = 70.1825, GNorm = 0.5299, lr_0 = 1.2912e-04
Loss = 3.1974e-02, PNorm = 70.1852, GNorm = 0.4715, lr_0 = 1.2903e-04
Loss = 3.7448e-02, PNorm = 70.1876, GNorm = 0.5254, lr_0 = 1.2894e-04
Loss = 4.7506e-02, PNorm = 70.1898, GNorm = 0.4948, lr_0 = 1.2885e-04
Loss = 3.2047e-02, PNorm = 70.1925, GNorm = 0.6683, lr_0 = 1.2876e-04
Loss = 3.6230e-02, PNorm = 70.1941, GNorm = 0.6143, lr_0 = 1.2867e-04
Loss = 4.2478e-02, PNorm = 70.1965, GNorm = 0.7576, lr_0 = 1.2859e-04
Loss = 3.7976e-02, PNorm = 70.1988, GNorm = 0.6077, lr_0 = 1.2850e-04
Loss = 3.2156e-02, PNorm = 70.2012, GNorm = 0.4662, lr_0 = 1.2841e-04
Loss = 3.9356e-02, PNorm = 70.2033, GNorm = 0.6406, lr_0 = 1.2832e-04
Loss = 3.7947e-02, PNorm = 70.2044, GNorm = 0.4528, lr_0 = 1.2823e-04
Loss = 4.9691e-02, PNorm = 70.2074, GNorm = 0.5723, lr_0 = 1.2815e-04
Loss = 3.1954e-02, PNorm = 70.2090, GNorm = 0.5584, lr_0 = 1.2806e-04
Loss = 3.7507e-02, PNorm = 70.2104, GNorm = 0.4756, lr_0 = 1.2797e-04
Validation mae = 0.390576
Epoch 27
Loss = 3.0459e-02, PNorm = 70.2122, GNorm = 0.3439, lr_0 = 1.2788e-04
Loss = 3.2279e-02, PNorm = 70.2142, GNorm = 0.5478, lr_0 = 1.2780e-04
Loss = 3.1959e-02, PNorm = 70.2162, GNorm = 0.4311, lr_0 = 1.2771e-04
Loss = 2.6854e-02, PNorm = 70.2181, GNorm = 0.4273, lr_0 = 1.2762e-04
Loss = 3.4676e-02, PNorm = 70.2188, GNorm = 0.4252, lr_0 = 1.2753e-04
Loss = 3.1596e-02, PNorm = 70.2196, GNorm = 0.5030, lr_0 = 1.2745e-04
Loss = 3.3402e-02, PNorm = 70.2224, GNorm = 0.4138, lr_0 = 1.2736e-04
Loss = 3.2272e-02, PNorm = 70.2245, GNorm = 0.5223, lr_0 = 1.2727e-04
Loss = 3.3032e-02, PNorm = 70.2272, GNorm = 0.4996, lr_0 = 1.2718e-04
Loss = 3.4190e-02, PNorm = 70.2295, GNorm = 0.5469, lr_0 = 1.2710e-04
Loss = 3.1129e-02, PNorm = 70.2322, GNorm = 0.3579, lr_0 = 1.2701e-04
Loss = 2.8248e-02, PNorm = 70.2340, GNorm = 0.3533, lr_0 = 1.2692e-04
Loss = 2.6097e-02, PNorm = 70.2356, GNorm = 0.3979, lr_0 = 1.2684e-04
Loss = 3.0498e-02, PNorm = 70.2365, GNorm = 0.6433, lr_0 = 1.2675e-04
Loss = 3.6782e-02, PNorm = 70.2376, GNorm = 0.3848, lr_0 = 1.2666e-04
Loss = 2.6980e-02, PNorm = 70.2390, GNorm = 0.3836, lr_0 = 1.2658e-04
Loss = 2.9010e-02, PNorm = 70.2402, GNorm = 0.5223, lr_0 = 1.2649e-04
Loss = 3.5287e-02, PNorm = 70.2418, GNorm = 0.5362, lr_0 = 1.2640e-04
Loss = 3.0964e-02, PNorm = 70.2433, GNorm = 0.4231, lr_0 = 1.2632e-04
Loss = 3.4357e-02, PNorm = 70.2448, GNorm = 0.3587, lr_0 = 1.2623e-04
Loss = 3.3471e-02, PNorm = 70.2475, GNorm = 0.5165, lr_0 = 1.2614e-04
Loss = 2.9079e-02, PNorm = 70.2502, GNorm = 0.4851, lr_0 = 1.2606e-04
Loss = 3.4880e-02, PNorm = 70.2523, GNorm = 0.5447, lr_0 = 1.2597e-04
Loss = 3.1296e-02, PNorm = 70.2542, GNorm = 0.5573, lr_0 = 1.2588e-04
Loss = 3.7026e-02, PNorm = 70.2561, GNorm = 0.6271, lr_0 = 1.2580e-04
Loss = 3.5623e-02, PNorm = 70.2579, GNorm = 0.5009, lr_0 = 1.2571e-04
Loss = 3.4585e-02, PNorm = 70.2607, GNorm = 0.6039, lr_0 = 1.2563e-04
Loss = 3.1029e-02, PNorm = 70.2623, GNorm = 0.5943, lr_0 = 1.2554e-04
Loss = 3.1475e-02, PNorm = 70.2634, GNorm = 0.3703, lr_0 = 1.2545e-04
Loss = 3.0769e-02, PNorm = 70.2647, GNorm = 0.4182, lr_0 = 1.2537e-04
Loss = 3.3100e-02, PNorm = 70.2667, GNorm = 0.6864, lr_0 = 1.2528e-04
Loss = 3.6523e-02, PNorm = 70.2690, GNorm = 0.4966, lr_0 = 1.2520e-04
Loss = 2.8770e-02, PNorm = 70.2713, GNorm = 0.5305, lr_0 = 1.2511e-04
Loss = 3.9471e-02, PNorm = 70.2728, GNorm = 0.5065, lr_0 = 1.2502e-04
Loss = 3.3835e-02, PNorm = 70.2742, GNorm = 0.3556, lr_0 = 1.2494e-04
Loss = 3.0912e-02, PNorm = 70.2764, GNorm = 0.4820, lr_0 = 1.2485e-04
Loss = 2.9834e-02, PNorm = 70.2785, GNorm = 0.6109, lr_0 = 1.2477e-04
Loss = 3.3616e-02, PNorm = 70.2807, GNorm = 0.6170, lr_0 = 1.2468e-04
Loss = 3.4520e-02, PNorm = 70.2835, GNorm = 0.5954, lr_0 = 1.2460e-04
Loss = 3.1302e-02, PNorm = 70.2862, GNorm = 0.3489, lr_0 = 1.2451e-04
Loss = 3.4954e-02, PNorm = 70.2888, GNorm = 0.4808, lr_0 = 1.2443e-04
Loss = 3.4530e-02, PNorm = 70.2901, GNorm = 0.2873, lr_0 = 1.2434e-04
Loss = 3.4603e-02, PNorm = 70.2906, GNorm = 0.4990, lr_0 = 1.2426e-04
Loss = 3.1607e-02, PNorm = 70.2911, GNorm = 0.8440, lr_0 = 1.2417e-04
Loss = 2.9049e-02, PNorm = 70.2932, GNorm = 0.5991, lr_0 = 1.2409e-04
Loss = 2.7692e-02, PNorm = 70.2943, GNorm = 0.3956, lr_0 = 1.2400e-04
Loss = 3.0704e-02, PNorm = 70.2953, GNorm = 0.3742, lr_0 = 1.2392e-04
Loss = 3.3427e-02, PNorm = 70.2965, GNorm = 0.7468, lr_0 = 1.2383e-04
Loss = 3.5710e-02, PNorm = 70.2987, GNorm = 0.6406, lr_0 = 1.2375e-04
Loss = 3.3575e-02, PNorm = 70.3010, GNorm = 0.4836, lr_0 = 1.2366e-04
Loss = 3.1449e-02, PNorm = 70.3029, GNorm = 0.5259, lr_0 = 1.2358e-04
Loss = 3.2738e-02, PNorm = 70.3046, GNorm = 0.5398, lr_0 = 1.2349e-04
Loss = 3.4788e-02, PNorm = 70.3068, GNorm = 0.3795, lr_0 = 1.2341e-04
Loss = 3.4124e-02, PNorm = 70.3089, GNorm = 0.3384, lr_0 = 1.2332e-04
Loss = 3.4186e-02, PNorm = 70.3100, GNorm = 0.5231, lr_0 = 1.2324e-04
Loss = 3.4815e-02, PNorm = 70.3112, GNorm = 0.5246, lr_0 = 1.2315e-04
Loss = 3.1101e-02, PNorm = 70.3123, GNorm = 0.4578, lr_0 = 1.2307e-04
Loss = 3.6301e-02, PNorm = 70.3141, GNorm = 0.8188, lr_0 = 1.2298e-04
Loss = 3.0769e-02, PNorm = 70.3164, GNorm = 0.5625, lr_0 = 1.2290e-04
Loss = 3.5252e-02, PNorm = 70.3178, GNorm = 0.3765, lr_0 = 1.2282e-04
Loss = 3.8906e-02, PNorm = 70.3195, GNorm = 0.5567, lr_0 = 1.2273e-04
Loss = 3.5898e-02, PNorm = 70.3224, GNorm = 0.3608, lr_0 = 1.2265e-04
Loss = 3.5799e-02, PNorm = 70.3249, GNorm = 0.3823, lr_0 = 1.2256e-04
Loss = 3.5717e-02, PNorm = 70.3266, GNorm = 0.4184, lr_0 = 1.2248e-04
Loss = 2.8500e-02, PNorm = 70.3274, GNorm = 0.3649, lr_0 = 1.2240e-04
Loss = 3.2606e-02, PNorm = 70.3282, GNorm = 0.4911, lr_0 = 1.2231e-04
Loss = 3.3154e-02, PNorm = 70.3291, GNorm = 0.5382, lr_0 = 1.2223e-04
Loss = 3.1368e-02, PNorm = 70.3308, GNorm = 0.4298, lr_0 = 1.2214e-04
Loss = 3.1097e-02, PNorm = 70.3317, GNorm = 0.4680, lr_0 = 1.2206e-04
Loss = 3.3397e-02, PNorm = 70.3336, GNorm = 0.6247, lr_0 = 1.2198e-04
Loss = 3.2505e-02, PNorm = 70.3360, GNorm = 0.4686, lr_0 = 1.2189e-04
Loss = 3.1558e-02, PNorm = 70.3378, GNorm = 0.4688, lr_0 = 1.2181e-04
Loss = 2.7700e-02, PNorm = 70.3403, GNorm = 0.3954, lr_0 = 1.2173e-04
Loss = 3.1457e-02, PNorm = 70.3423, GNorm = 0.5468, lr_0 = 1.2164e-04
Loss = 3.2910e-02, PNorm = 70.3428, GNorm = 0.5046, lr_0 = 1.2156e-04
Loss = 3.3356e-02, PNorm = 70.3439, GNorm = 0.3594, lr_0 = 1.2148e-04
Loss = 2.6757e-02, PNorm = 70.3449, GNorm = 0.4247, lr_0 = 1.2139e-04
Loss = 3.5760e-02, PNorm = 70.3460, GNorm = 0.4554, lr_0 = 1.2131e-04
Loss = 3.6357e-02, PNorm = 70.3476, GNorm = 0.4577, lr_0 = 1.2123e-04
Loss = 3.3528e-02, PNorm = 70.3494, GNorm = 0.4059, lr_0 = 1.2114e-04
Loss = 3.1759e-02, PNorm = 70.3503, GNorm = 0.4982, lr_0 = 1.2106e-04
Loss = 3.9691e-02, PNorm = 70.3513, GNorm = 0.4991, lr_0 = 1.2098e-04
Loss = 3.8202e-02, PNorm = 70.3536, GNorm = 0.6146, lr_0 = 1.2090e-04
Loss = 3.4019e-02, PNorm = 70.3554, GNorm = 0.5374, lr_0 = 1.2081e-04
Loss = 3.5642e-02, PNorm = 70.3572, GNorm = 0.5566, lr_0 = 1.2073e-04
Loss = 3.2407e-02, PNorm = 70.3588, GNorm = 0.7489, lr_0 = 1.2065e-04
Loss = 3.0316e-02, PNorm = 70.3600, GNorm = 0.6370, lr_0 = 1.2056e-04
Loss = 3.2104e-02, PNorm = 70.3619, GNorm = 0.6639, lr_0 = 1.2048e-04
Loss = 4.0091e-02, PNorm = 70.3645, GNorm = 0.6410, lr_0 = 1.2040e-04
Loss = 3.8965e-02, PNorm = 70.3657, GNorm = 0.4833, lr_0 = 1.2032e-04
Loss = 3.6596e-02, PNorm = 70.3665, GNorm = 0.5890, lr_0 = 1.2023e-04
Loss = 2.8827e-02, PNorm = 70.3674, GNorm = 0.5754, lr_0 = 1.2015e-04
Loss = 3.8892e-02, PNorm = 70.3693, GNorm = 0.4856, lr_0 = 1.2007e-04
Loss = 3.1767e-02, PNorm = 70.3720, GNorm = 0.5064, lr_0 = 1.1999e-04
Loss = 3.5356e-02, PNorm = 70.3724, GNorm = 0.4900, lr_0 = 1.1991e-04
Loss = 3.4401e-02, PNorm = 70.3740, GNorm = 0.5885, lr_0 = 1.1982e-04
Loss = 3.8100e-02, PNorm = 70.3763, GNorm = 0.5686, lr_0 = 1.1974e-04
Loss = 3.0634e-02, PNorm = 70.3782, GNorm = 0.4335, lr_0 = 1.1966e-04
Loss = 2.9484e-02, PNorm = 70.3797, GNorm = 0.3888, lr_0 = 1.1958e-04
Loss = 3.5274e-02, PNorm = 70.3801, GNorm = 0.5133, lr_0 = 1.1950e-04
Loss = 3.6334e-02, PNorm = 70.3806, GNorm = 0.4807, lr_0 = 1.1941e-04
Loss = 3.2028e-02, PNorm = 70.3822, GNorm = 0.3752, lr_0 = 1.1933e-04
Loss = 2.9991e-02, PNorm = 70.3844, GNorm = 0.4371, lr_0 = 1.1925e-04
Loss = 3.4817e-02, PNorm = 70.3875, GNorm = 0.4221, lr_0 = 1.1917e-04
Loss = 3.2125e-02, PNorm = 70.3881, GNorm = 0.5442, lr_0 = 1.1909e-04
Loss = 3.5982e-02, PNorm = 70.3900, GNorm = 0.5443, lr_0 = 1.1901e-04
Loss = 3.9885e-02, PNorm = 70.3922, GNorm = 0.5190, lr_0 = 1.1892e-04
Loss = 3.6242e-02, PNorm = 70.3944, GNorm = 0.5150, lr_0 = 1.1884e-04
Loss = 3.9001e-02, PNorm = 70.3968, GNorm = 0.5083, lr_0 = 1.1876e-04
Loss = 3.3119e-02, PNorm = 70.3987, GNorm = 0.3610, lr_0 = 1.1868e-04
Loss = 3.0591e-02, PNorm = 70.4004, GNorm = 0.4096, lr_0 = 1.1860e-04
Loss = 3.5180e-02, PNorm = 70.4015, GNorm = 0.4349, lr_0 = 1.1852e-04
Loss = 3.1966e-02, PNorm = 70.4031, GNorm = 0.3499, lr_0 = 1.1844e-04
Loss = 3.4187e-02, PNorm = 70.4044, GNorm = 0.5852, lr_0 = 1.1835e-04
Loss = 3.8307e-02, PNorm = 70.4059, GNorm = 0.5604, lr_0 = 1.1827e-04
Loss = 3.4240e-02, PNorm = 70.4078, GNorm = 0.3343, lr_0 = 1.1819e-04
Loss = 3.2986e-02, PNorm = 70.4100, GNorm = 0.3533, lr_0 = 1.1811e-04
Loss = 3.4428e-02, PNorm = 70.4117, GNorm = 0.3609, lr_0 = 1.1803e-04
Loss = 4.1999e-02, PNorm = 70.4131, GNorm = 0.5886, lr_0 = 1.1795e-04
Loss = 4.0463e-02, PNorm = 70.4152, GNorm = 0.5781, lr_0 = 1.1787e-04
Validation mae = 0.391591
Epoch 28
Loss = 3.1526e-02, PNorm = 70.4169, GNorm = 0.6573, lr_0 = 1.1779e-04
Loss = 3.4157e-02, PNorm = 70.4192, GNorm = 0.3824, lr_0 = 1.1771e-04
Loss = 2.8587e-02, PNorm = 70.4208, GNorm = 0.6293, lr_0 = 1.1763e-04
Loss = 3.1940e-02, PNorm = 70.4214, GNorm = 0.6656, lr_0 = 1.1755e-04
Loss = 2.9470e-02, PNorm = 70.4225, GNorm = 0.4905, lr_0 = 1.1747e-04
Loss = 3.0479e-02, PNorm = 70.4243, GNorm = 0.3956, lr_0 = 1.1739e-04
Loss = 3.2442e-02, PNorm = 70.4262, GNorm = 0.6369, lr_0 = 1.1730e-04
Loss = 2.9761e-02, PNorm = 70.4284, GNorm = 0.5103, lr_0 = 1.1722e-04
Loss = 2.9066e-02, PNorm = 70.4297, GNorm = 0.4880, lr_0 = 1.1714e-04
Loss = 3.1298e-02, PNorm = 70.4311, GNorm = 0.4943, lr_0 = 1.1706e-04
Loss = 2.7777e-02, PNorm = 70.4322, GNorm = 0.3103, lr_0 = 1.1698e-04
Loss = 2.9052e-02, PNorm = 70.4328, GNorm = 0.4442, lr_0 = 1.1690e-04
Loss = 3.6742e-02, PNorm = 70.4353, GNorm = 0.3537, lr_0 = 1.1682e-04
Loss = 3.0385e-02, PNorm = 70.4375, GNorm = 0.3845, lr_0 = 1.1674e-04
Loss = 2.8701e-02, PNorm = 70.4387, GNorm = 0.4155, lr_0 = 1.1666e-04
Loss = 3.1054e-02, PNorm = 70.4406, GNorm = 0.5704, lr_0 = 1.1658e-04
Loss = 3.6066e-02, PNorm = 70.4436, GNorm = 0.3566, lr_0 = 1.1650e-04
Loss = 3.1277e-02, PNorm = 70.4461, GNorm = 0.8252, lr_0 = 1.1642e-04
Loss = 2.7521e-02, PNorm = 70.4484, GNorm = 0.3573, lr_0 = 1.1634e-04
Loss = 3.5214e-02, PNorm = 70.4507, GNorm = 0.5045, lr_0 = 1.1626e-04
Loss = 3.3329e-02, PNorm = 70.4529, GNorm = 0.9268, lr_0 = 1.1618e-04
Loss = 3.4182e-02, PNorm = 70.4545, GNorm = 0.8620, lr_0 = 1.1611e-04
Loss = 3.0586e-02, PNorm = 70.4563, GNorm = 0.4047, lr_0 = 1.1603e-04
Loss = 3.3020e-02, PNorm = 70.4580, GNorm = 0.5997, lr_0 = 1.1595e-04
Loss = 3.3777e-02, PNorm = 70.4599, GNorm = 0.5598, lr_0 = 1.1587e-04
Loss = 3.2265e-02, PNorm = 70.4621, GNorm = 0.4080, lr_0 = 1.1579e-04
Loss = 3.3023e-02, PNorm = 70.4640, GNorm = 0.5946, lr_0 = 1.1571e-04
Loss = 3.3460e-02, PNorm = 70.4666, GNorm = 0.4035, lr_0 = 1.1563e-04
Loss = 3.0076e-02, PNorm = 70.4688, GNorm = 0.4531, lr_0 = 1.1555e-04
Loss = 3.1260e-02, PNorm = 70.4713, GNorm = 0.5056, lr_0 = 1.1547e-04
Loss = 3.1984e-02, PNorm = 70.4731, GNorm = 0.5258, lr_0 = 1.1539e-04
Loss = 3.2088e-02, PNorm = 70.4746, GNorm = 0.6759, lr_0 = 1.1531e-04
Loss = 2.8908e-02, PNorm = 70.4763, GNorm = 0.5138, lr_0 = 1.1523e-04
Loss = 3.2096e-02, PNorm = 70.4777, GNorm = 0.5297, lr_0 = 1.1515e-04
Loss = 3.3427e-02, PNorm = 70.4791, GNorm = 0.5760, lr_0 = 1.1508e-04
Loss = 2.5982e-02, PNorm = 70.4806, GNorm = 0.4509, lr_0 = 1.1500e-04
Loss = 2.9979e-02, PNorm = 70.4813, GNorm = 0.4020, lr_0 = 1.1492e-04
Loss = 3.0259e-02, PNorm = 70.4827, GNorm = 0.4416, lr_0 = 1.1484e-04
Loss = 3.5395e-02, PNorm = 70.4840, GNorm = 0.4120, lr_0 = 1.1476e-04
Loss = 2.7820e-02, PNorm = 70.4843, GNorm = 0.3827, lr_0 = 1.1468e-04
Loss = 3.0151e-02, PNorm = 70.4858, GNorm = 0.4119, lr_0 = 1.1460e-04
Loss = 2.9901e-02, PNorm = 70.4873, GNorm = 0.4795, lr_0 = 1.1452e-04
Loss = 2.9066e-02, PNorm = 70.4894, GNorm = 0.6083, lr_0 = 1.1445e-04
Loss = 3.5615e-02, PNorm = 70.4912, GNorm = 0.4890, lr_0 = 1.1437e-04
Loss = 2.6915e-02, PNorm = 70.4934, GNorm = 0.4310, lr_0 = 1.1429e-04
Loss = 3.4752e-02, PNorm = 70.4943, GNorm = 0.5194, lr_0 = 1.1421e-04
Loss = 2.8843e-02, PNorm = 70.4942, GNorm = 0.4003, lr_0 = 1.1413e-04
Loss = 3.3472e-02, PNorm = 70.4956, GNorm = 0.4275, lr_0 = 1.1405e-04
Loss = 3.4528e-02, PNorm = 70.4979, GNorm = 0.6678, lr_0 = 1.1398e-04
Loss = 3.0517e-02, PNorm = 70.4996, GNorm = 0.6379, lr_0 = 1.1390e-04
Loss = 2.8646e-02, PNorm = 70.5010, GNorm = 0.4152, lr_0 = 1.1382e-04
Loss = 2.8294e-02, PNorm = 70.5010, GNorm = 0.4690, lr_0 = 1.1374e-04
Loss = 3.0563e-02, PNorm = 70.5015, GNorm = 0.5818, lr_0 = 1.1366e-04
Loss = 2.9384e-02, PNorm = 70.5038, GNorm = 0.4984, lr_0 = 1.1359e-04
Loss = 2.9701e-02, PNorm = 70.5052, GNorm = 0.6619, lr_0 = 1.1351e-04
Loss = 3.1849e-02, PNorm = 70.5063, GNorm = 0.4014, lr_0 = 1.1343e-04
Loss = 2.7944e-02, PNorm = 70.5071, GNorm = 0.4322, lr_0 = 1.1335e-04
Loss = 3.2323e-02, PNorm = 70.5090, GNorm = 0.4544, lr_0 = 1.1328e-04
Loss = 2.9581e-02, PNorm = 70.5110, GNorm = 0.3991, lr_0 = 1.1320e-04
Loss = 3.0596e-02, PNorm = 70.5126, GNorm = 0.4606, lr_0 = 1.1312e-04
Loss = 3.2706e-02, PNorm = 70.5141, GNorm = 0.5877, lr_0 = 1.1304e-04
Loss = 2.5644e-02, PNorm = 70.5158, GNorm = 0.3737, lr_0 = 1.1297e-04
Loss = 2.9271e-02, PNorm = 70.5169, GNorm = 0.4566, lr_0 = 1.1289e-04
Loss = 2.8430e-02, PNorm = 70.5182, GNorm = 0.4106, lr_0 = 1.1281e-04
Loss = 3.2193e-02, PNorm = 70.5194, GNorm = 0.3590, lr_0 = 1.1273e-04
Loss = 3.2378e-02, PNorm = 70.5213, GNorm = 0.4357, lr_0 = 1.1266e-04
Loss = 2.9983e-02, PNorm = 70.5230, GNorm = 0.3892, lr_0 = 1.1258e-04
Loss = 3.5768e-02, PNorm = 70.5247, GNorm = 0.4673, lr_0 = 1.1250e-04
Loss = 3.3697e-02, PNorm = 70.5265, GNorm = 0.4651, lr_0 = 1.1243e-04
Loss = 3.2981e-02, PNorm = 70.5290, GNorm = 0.6088, lr_0 = 1.1235e-04
Loss = 3.7570e-02, PNorm = 70.5312, GNorm = 0.5212, lr_0 = 1.1227e-04
Loss = 3.2111e-02, PNorm = 70.5325, GNorm = 0.8109, lr_0 = 1.1219e-04
Loss = 3.3477e-02, PNorm = 70.5332, GNorm = 0.5042, lr_0 = 1.1212e-04
Loss = 3.0792e-02, PNorm = 70.5343, GNorm = 0.5367, lr_0 = 1.1204e-04
Loss = 2.9955e-02, PNorm = 70.5359, GNorm = 0.4750, lr_0 = 1.1196e-04
Loss = 3.0175e-02, PNorm = 70.5374, GNorm = 0.4926, lr_0 = 1.1189e-04
Loss = 3.1347e-02, PNorm = 70.5393, GNorm = 0.5310, lr_0 = 1.1181e-04
Loss = 2.9951e-02, PNorm = 70.5398, GNorm = 0.4622, lr_0 = 1.1173e-04
Loss = 3.0166e-02, PNorm = 70.5405, GNorm = 0.3822, lr_0 = 1.1166e-04
Loss = 3.6527e-02, PNorm = 70.5417, GNorm = 0.4770, lr_0 = 1.1158e-04
Loss = 3.1771e-02, PNorm = 70.5423, GNorm = 0.5014, lr_0 = 1.1150e-04
Loss = 3.4262e-02, PNorm = 70.5435, GNorm = 0.5933, lr_0 = 1.1143e-04
Loss = 3.3241e-02, PNorm = 70.5453, GNorm = 0.4717, lr_0 = 1.1135e-04
Loss = 3.1059e-02, PNorm = 70.5473, GNorm = 0.3843, lr_0 = 1.1128e-04
Loss = 3.0469e-02, PNorm = 70.5484, GNorm = 0.3951, lr_0 = 1.1120e-04
Loss = 3.1121e-02, PNorm = 70.5498, GNorm = 0.5125, lr_0 = 1.1112e-04
Loss = 2.7458e-02, PNorm = 70.5505, GNorm = 0.3762, lr_0 = 1.1105e-04
Loss = 3.4415e-02, PNorm = 70.5515, GNorm = 0.4687, lr_0 = 1.1097e-04
Loss = 3.1444e-02, PNorm = 70.5518, GNorm = 0.4702, lr_0 = 1.1089e-04
Loss = 3.0066e-02, PNorm = 70.5529, GNorm = 0.4286, lr_0 = 1.1082e-04
Loss = 3.2697e-02, PNorm = 70.5538, GNorm = 0.6219, lr_0 = 1.1074e-04
Loss = 3.3156e-02, PNorm = 70.5551, GNorm = 0.4763, lr_0 = 1.1067e-04
Loss = 3.0722e-02, PNorm = 70.5574, GNorm = 0.4171, lr_0 = 1.1059e-04
Loss = 3.6991e-02, PNorm = 70.5597, GNorm = 0.5138, lr_0 = 1.1052e-04
Loss = 3.3488e-02, PNorm = 70.5600, GNorm = 0.7728, lr_0 = 1.1044e-04
Loss = 3.0130e-02, PNorm = 70.5606, GNorm = 0.5845, lr_0 = 1.1036e-04
Loss = 3.4090e-02, PNorm = 70.5616, GNorm = 0.4369, lr_0 = 1.1029e-04
Loss = 3.4062e-02, PNorm = 70.5631, GNorm = 0.4601, lr_0 = 1.1021e-04
Loss = 3.1041e-02, PNorm = 70.5649, GNorm = 0.6636, lr_0 = 1.1014e-04
Loss = 3.4097e-02, PNorm = 70.5658, GNorm = 0.4688, lr_0 = 1.1006e-04
Loss = 3.1654e-02, PNorm = 70.5667, GNorm = 0.3656, lr_0 = 1.0999e-04
Loss = 3.4309e-02, PNorm = 70.5678, GNorm = 0.4290, lr_0 = 1.0991e-04
Loss = 3.3160e-02, PNorm = 70.5687, GNorm = 0.4777, lr_0 = 1.0984e-04
Loss = 3.2475e-02, PNorm = 70.5707, GNorm = 0.6610, lr_0 = 1.0976e-04
Loss = 3.3249e-02, PNorm = 70.5730, GNorm = 0.5489, lr_0 = 1.0969e-04
Loss = 3.0263e-02, PNorm = 70.5736, GNorm = 0.3446, lr_0 = 1.0961e-04
Loss = 3.5624e-02, PNorm = 70.5747, GNorm = 0.4798, lr_0 = 1.0954e-04
Loss = 2.7667e-02, PNorm = 70.5770, GNorm = 0.4223, lr_0 = 1.0946e-04
Loss = 2.9789e-02, PNorm = 70.5789, GNorm = 0.4398, lr_0 = 1.0939e-04
Loss = 3.3237e-02, PNorm = 70.5808, GNorm = 0.4099, lr_0 = 1.0931e-04
Loss = 3.1930e-02, PNorm = 70.5825, GNorm = 0.4911, lr_0 = 1.0924e-04
Loss = 3.0887e-02, PNorm = 70.5846, GNorm = 0.5277, lr_0 = 1.0916e-04
Loss = 3.8442e-02, PNorm = 70.5861, GNorm = 0.6052, lr_0 = 1.0909e-04
Loss = 4.1239e-02, PNorm = 70.5878, GNorm = 0.4578, lr_0 = 1.0901e-04
Loss = 3.7194e-02, PNorm = 70.5899, GNorm = 0.5182, lr_0 = 1.0894e-04
Loss = 3.6708e-02, PNorm = 70.5925, GNorm = 0.6053, lr_0 = 1.0886e-04
Loss = 4.1884e-02, PNorm = 70.5945, GNorm = 0.7280, lr_0 = 1.0879e-04
Loss = 3.6368e-02, PNorm = 70.5961, GNorm = 0.5018, lr_0 = 1.0871e-04
Loss = 2.6869e-02, PNorm = 70.5983, GNorm = 0.4052, lr_0 = 1.0864e-04
Loss = 3.2055e-02, PNorm = 70.5990, GNorm = 0.6717, lr_0 = 1.0856e-04
Validation mae = 0.389304
Epoch 29
Loss = 2.6750e-02, PNorm = 70.6006, GNorm = 0.4902, lr_0 = 1.0849e-04
Loss = 3.1780e-02, PNorm = 70.6024, GNorm = 0.4629, lr_0 = 1.0841e-04
Loss = 2.4970e-02, PNorm = 70.6033, GNorm = 0.4464, lr_0 = 1.0834e-04
Loss = 3.0373e-02, PNorm = 70.6043, GNorm = 0.4359, lr_0 = 1.0827e-04
Loss = 2.8902e-02, PNorm = 70.6063, GNorm = 0.5206, lr_0 = 1.0819e-04
Loss = 2.7686e-02, PNorm = 70.6075, GNorm = 0.5205, lr_0 = 1.0812e-04
Loss = 2.7872e-02, PNorm = 70.6085, GNorm = 0.4638, lr_0 = 1.0804e-04
Loss = 2.9945e-02, PNorm = 70.6093, GNorm = 0.5518, lr_0 = 1.0797e-04
Loss = 2.7603e-02, PNorm = 70.6093, GNorm = 0.4542, lr_0 = 1.0790e-04
Loss = 2.6657e-02, PNorm = 70.6109, GNorm = 0.5406, lr_0 = 1.0782e-04
Loss = 2.9627e-02, PNorm = 70.6129, GNorm = 0.4209, lr_0 = 1.0775e-04
Loss = 2.8565e-02, PNorm = 70.6143, GNorm = 0.5188, lr_0 = 1.0767e-04
Loss = 3.1836e-02, PNorm = 70.6160, GNorm = 0.4940, lr_0 = 1.0760e-04
Loss = 2.5667e-02, PNorm = 70.6178, GNorm = 0.3557, lr_0 = 1.0753e-04
Loss = 2.7874e-02, PNorm = 70.6190, GNorm = 0.4062, lr_0 = 1.0745e-04
Loss = 3.0657e-02, PNorm = 70.6196, GNorm = 0.4264, lr_0 = 1.0738e-04
Loss = 2.9129e-02, PNorm = 70.6199, GNorm = 0.4245, lr_0 = 1.0731e-04
Loss = 2.9944e-02, PNorm = 70.6213, GNorm = 0.3648, lr_0 = 1.0723e-04
Loss = 2.9039e-02, PNorm = 70.6232, GNorm = 0.6029, lr_0 = 1.0716e-04
Loss = 2.6711e-02, PNorm = 70.6251, GNorm = 0.4097, lr_0 = 1.0709e-04
Loss = 2.7303e-02, PNorm = 70.6265, GNorm = 0.4999, lr_0 = 1.0701e-04
Loss = 3.4119e-02, PNorm = 70.6276, GNorm = 0.4976, lr_0 = 1.0694e-04
Loss = 3.1900e-02, PNorm = 70.6280, GNorm = 0.6210, lr_0 = 1.0687e-04
Loss = 2.5736e-02, PNorm = 70.6289, GNorm = 0.3265, lr_0 = 1.0679e-04
Loss = 2.9547e-02, PNorm = 70.6303, GNorm = 0.5186, lr_0 = 1.0672e-04
Loss = 2.6036e-02, PNorm = 70.6315, GNorm = 0.3380, lr_0 = 1.0665e-04
Loss = 3.4986e-02, PNorm = 70.6333, GNorm = 0.4073, lr_0 = 1.0657e-04
Loss = 3.3375e-02, PNorm = 70.6349, GNorm = 0.5250, lr_0 = 1.0650e-04
Loss = 3.2418e-02, PNorm = 70.6370, GNorm = 0.5738, lr_0 = 1.0643e-04
Loss = 3.0063e-02, PNorm = 70.6389, GNorm = 0.7333, lr_0 = 1.0635e-04
Loss = 3.0580e-02, PNorm = 70.6404, GNorm = 0.3841, lr_0 = 1.0628e-04
Loss = 2.3496e-02, PNorm = 70.6425, GNorm = 0.3333, lr_0 = 1.0621e-04
Loss = 2.9831e-02, PNorm = 70.6443, GNorm = 0.4446, lr_0 = 1.0614e-04
Loss = 3.3545e-02, PNorm = 70.6458, GNorm = 0.5734, lr_0 = 1.0606e-04
Loss = 3.0088e-02, PNorm = 70.6468, GNorm = 0.5963, lr_0 = 1.0599e-04
Loss = 3.3957e-02, PNorm = 70.6481, GNorm = 0.4597, lr_0 = 1.0592e-04
Loss = 2.8644e-02, PNorm = 70.6499, GNorm = 0.5463, lr_0 = 1.0585e-04
Loss = 2.9082e-02, PNorm = 70.6509, GNorm = 0.3830, lr_0 = 1.0577e-04
Loss = 2.8734e-02, PNorm = 70.6516, GNorm = 0.5263, lr_0 = 1.0570e-04
Loss = 3.0657e-02, PNorm = 70.6530, GNorm = 0.7944, lr_0 = 1.0563e-04
Loss = 3.5162e-02, PNorm = 70.6546, GNorm = 0.6588, lr_0 = 1.0556e-04
Loss = 2.8051e-02, PNorm = 70.6562, GNorm = 0.5102, lr_0 = 1.0548e-04
Loss = 2.7775e-02, PNorm = 70.6581, GNorm = 0.3769, lr_0 = 1.0541e-04
Loss = 2.8742e-02, PNorm = 70.6597, GNorm = 0.3188, lr_0 = 1.0534e-04
Loss = 3.2205e-02, PNorm = 70.6608, GNorm = 0.4330, lr_0 = 1.0527e-04
Loss = 3.2132e-02, PNorm = 70.6631, GNorm = 0.4060, lr_0 = 1.0519e-04
Loss = 2.9589e-02, PNorm = 70.6653, GNorm = 0.3673, lr_0 = 1.0512e-04
Loss = 2.8061e-02, PNorm = 70.6667, GNorm = 0.3867, lr_0 = 1.0505e-04
Loss = 2.7291e-02, PNorm = 70.6678, GNorm = 0.4313, lr_0 = 1.0498e-04
Loss = 2.8558e-02, PNorm = 70.6688, GNorm = 0.4053, lr_0 = 1.0491e-04
Loss = 2.8193e-02, PNorm = 70.6710, GNorm = 0.4852, lr_0 = 1.0483e-04
Loss = 2.9909e-02, PNorm = 70.6739, GNorm = 0.5577, lr_0 = 1.0476e-04
Loss = 3.3726e-02, PNorm = 70.6753, GNorm = 0.6390, lr_0 = 1.0469e-04
Loss = 2.8181e-02, PNorm = 70.6768, GNorm = 0.4957, lr_0 = 1.0462e-04
Loss = 3.1767e-02, PNorm = 70.6790, GNorm = 0.3980, lr_0 = 1.0455e-04
Loss = 2.9635e-02, PNorm = 70.6808, GNorm = 0.5921, lr_0 = 1.0448e-04
Loss = 2.4953e-02, PNorm = 70.6818, GNorm = 0.4260, lr_0 = 1.0440e-04
Loss = 2.9236e-02, PNorm = 70.6834, GNorm = 0.4394, lr_0 = 1.0433e-04
Loss = 3.1577e-02, PNorm = 70.6849, GNorm = 0.4627, lr_0 = 1.0426e-04
Loss = 3.2706e-02, PNorm = 70.6865, GNorm = 0.4620, lr_0 = 1.0419e-04
Loss = 2.8514e-02, PNorm = 70.6880, GNorm = 0.4991, lr_0 = 1.0412e-04
Loss = 2.9301e-02, PNorm = 70.6899, GNorm = 0.4276, lr_0 = 1.0405e-04
Loss = 3.2945e-02, PNorm = 70.6914, GNorm = 0.3685, lr_0 = 1.0398e-04
Loss = 2.9753e-02, PNorm = 70.6931, GNorm = 0.3949, lr_0 = 1.0391e-04
Loss = 3.2088e-02, PNorm = 70.6947, GNorm = 0.4337, lr_0 = 1.0383e-04
Loss = 2.7251e-02, PNorm = 70.6955, GNorm = 0.3472, lr_0 = 1.0376e-04
Loss = 3.2464e-02, PNorm = 70.6962, GNorm = 0.3547, lr_0 = 1.0369e-04
Loss = 3.9253e-02, PNorm = 70.6974, GNorm = 0.4390, lr_0 = 1.0362e-04
Loss = 3.3756e-02, PNorm = 70.6993, GNorm = 0.7053, lr_0 = 1.0355e-04
Loss = 3.2127e-02, PNorm = 70.7007, GNorm = 0.5647, lr_0 = 1.0348e-04
Loss = 2.9697e-02, PNorm = 70.7018, GNorm = 0.3328, lr_0 = 1.0341e-04
Loss = 3.2278e-02, PNorm = 70.7025, GNorm = 0.5114, lr_0 = 1.0334e-04
Loss = 3.5802e-02, PNorm = 70.7042, GNorm = 0.8331, lr_0 = 1.0327e-04
Loss = 3.0478e-02, PNorm = 70.7055, GNorm = 0.3272, lr_0 = 1.0320e-04
Loss = 2.8709e-02, PNorm = 70.7059, GNorm = 0.4828, lr_0 = 1.0312e-04
Loss = 2.6137e-02, PNorm = 70.7068, GNorm = 0.4724, lr_0 = 1.0305e-04
Loss = 3.0504e-02, PNorm = 70.7080, GNorm = 0.4436, lr_0 = 1.0298e-04
Loss = 2.8221e-02, PNorm = 70.7090, GNorm = 0.4312, lr_0 = 1.0291e-04
Loss = 2.4884e-02, PNorm = 70.7093, GNorm = 0.4970, lr_0 = 1.0284e-04
Loss = 3.4648e-02, PNorm = 70.7099, GNorm = 0.3518, lr_0 = 1.0277e-04
Loss = 2.9611e-02, PNorm = 70.7109, GNorm = 0.6044, lr_0 = 1.0270e-04
Loss = 3.0757e-02, PNorm = 70.7129, GNorm = 0.6066, lr_0 = 1.0263e-04
Loss = 2.6282e-02, PNorm = 70.7151, GNorm = 0.4485, lr_0 = 1.0256e-04
Loss = 2.4380e-02, PNorm = 70.7163, GNorm = 0.3523, lr_0 = 1.0249e-04
Loss = 2.9115e-02, PNorm = 70.7172, GNorm = 0.6596, lr_0 = 1.0242e-04
Loss = 2.9397e-02, PNorm = 70.7178, GNorm = 0.5807, lr_0 = 1.0235e-04
Loss = 3.2195e-02, PNorm = 70.7195, GNorm = 0.6804, lr_0 = 1.0228e-04
Loss = 2.9387e-02, PNorm = 70.7213, GNorm = 0.4031, lr_0 = 1.0221e-04
Loss = 3.4125e-02, PNorm = 70.7225, GNorm = 0.4339, lr_0 = 1.0214e-04
Loss = 3.3535e-02, PNorm = 70.7247, GNorm = 0.5946, lr_0 = 1.0207e-04
Loss = 3.5094e-02, PNorm = 70.7262, GNorm = 0.5032, lr_0 = 1.0200e-04
Loss = 3.2234e-02, PNorm = 70.7277, GNorm = 0.3881, lr_0 = 1.0193e-04
Loss = 3.3711e-02, PNorm = 70.7308, GNorm = 0.4616, lr_0 = 1.0186e-04
Loss = 3.3642e-02, PNorm = 70.7326, GNorm = 0.5248, lr_0 = 1.0179e-04
Loss = 3.2184e-02, PNorm = 70.7352, GNorm = 0.4063, lr_0 = 1.0172e-04
Loss = 3.1001e-02, PNorm = 70.7371, GNorm = 0.4673, lr_0 = 1.0165e-04
Loss = 2.8789e-02, PNorm = 70.7383, GNorm = 0.4288, lr_0 = 1.0158e-04
Loss = 3.4118e-02, PNorm = 70.7376, GNorm = 0.6784, lr_0 = 1.0151e-04
Loss = 2.7434e-02, PNorm = 70.7384, GNorm = 0.5225, lr_0 = 1.0144e-04
Loss = 2.7623e-02, PNorm = 70.7389, GNorm = 0.4491, lr_0 = 1.0137e-04
Loss = 3.3276e-02, PNorm = 70.7402, GNorm = 0.5795, lr_0 = 1.0130e-04
Loss = 3.0432e-02, PNorm = 70.7411, GNorm = 0.4402, lr_0 = 1.0123e-04
Loss = 3.2709e-02, PNorm = 70.7424, GNorm = 0.5432, lr_0 = 1.0116e-04
Loss = 3.0009e-02, PNorm = 70.7430, GNorm = 0.5251, lr_0 = 1.0110e-04
Loss = 2.9531e-02, PNorm = 70.7444, GNorm = 0.5925, lr_0 = 1.0103e-04
Loss = 3.5543e-02, PNorm = 70.7466, GNorm = 0.5413, lr_0 = 1.0096e-04
Loss = 4.1478e-02, PNorm = 70.7485, GNorm = 0.4894, lr_0 = 1.0089e-04
Loss = 3.0594e-02, PNorm = 70.7503, GNorm = 0.4574, lr_0 = 1.0082e-04
Loss = 3.1791e-02, PNorm = 70.7517, GNorm = 0.4320, lr_0 = 1.0075e-04
Loss = 3.1194e-02, PNorm = 70.7532, GNorm = 0.3431, lr_0 = 1.0068e-04
Loss = 3.2183e-02, PNorm = 70.7543, GNorm = 0.6760, lr_0 = 1.0061e-04
Loss = 3.2547e-02, PNorm = 70.7544, GNorm = 0.5259, lr_0 = 1.0054e-04
Loss = 3.1217e-02, PNorm = 70.7546, GNorm = 0.4450, lr_0 = 1.0047e-04
Loss = 3.2356e-02, PNorm = 70.7559, GNorm = 0.4442, lr_0 = 1.0041e-04
Loss = 3.4728e-02, PNorm = 70.7570, GNorm = 0.6207, lr_0 = 1.0034e-04
Loss = 3.6655e-02, PNorm = 70.7580, GNorm = 0.5841, lr_0 = 1.0027e-04
Loss = 2.7609e-02, PNorm = 70.7589, GNorm = 0.4460, lr_0 = 1.0020e-04
Loss = 3.3680e-02, PNorm = 70.7599, GNorm = 0.4420, lr_0 = 1.0013e-04
Loss = 3.7224e-02, PNorm = 70.7612, GNorm = 0.5997, lr_0 = 1.0006e-04
Loss = 3.0734e-02, PNorm = 70.7620, GNorm = 0.3447, lr_0 = 1.0000e-04
Validation mae = 0.388259
Model 0 best validation mae = 0.382914 on epoch 17
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Moving model to cuda
Model 0 test mae = 0.377298
Ensemble test mae = 0.377298
Fold 3
Splitting data with seed 3
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.0, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=400, bias=False)
        (W_h): Linear(in_features=400, out_features=400, bias=False)
        (W_o): Linear(in_features=533, out_features=400, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=400, out_features=400, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=400, out_features=1, bias=True)
  )
)
Number of parameters = 593,201
Moving model to cuda
Epoch 0
Loss = 9.5743e-01, PNorm = 38.3715, GNorm = 1.2444, lr_0 = 1.0413e-04
Loss = 8.0330e-01, PNorm = 38.3745, GNorm = 2.0465, lr_0 = 1.0788e-04
Loss = 7.6325e-01, PNorm = 38.3778, GNorm = 3.0543, lr_0 = 1.1163e-04
Loss = 7.1388e-01, PNorm = 38.3804, GNorm = 2.8359, lr_0 = 1.1537e-04
Loss = 6.7878e-01, PNorm = 38.3840, GNorm = 1.6791, lr_0 = 1.1913e-04
Loss = 7.4434e-01, PNorm = 38.3887, GNorm = 6.0331, lr_0 = 1.2287e-04
Loss = 7.7591e-01, PNorm = 38.3929, GNorm = 3.8788, lr_0 = 1.2663e-04
Loss = 6.0442e-01, PNorm = 38.3978, GNorm = 3.2541, lr_0 = 1.3038e-04
Loss = 5.1411e-01, PNorm = 38.4031, GNorm = 1.7162, lr_0 = 1.3413e-04
Loss = 5.1849e-01, PNorm = 38.4091, GNorm = 19.4839, lr_0 = 1.3788e-04
Loss = 6.2802e-01, PNorm = 38.4134, GNorm = 10.4082, lr_0 = 1.4163e-04
Loss = 6.4118e-01, PNorm = 38.4180, GNorm = 1.6107, lr_0 = 1.4537e-04
Loss = 5.1422e-01, PNorm = 38.4235, GNorm = 5.6784, lr_0 = 1.4913e-04
Loss = 4.8415e-01, PNorm = 38.4295, GNorm = 2.6749, lr_0 = 1.5288e-04
Loss = 4.9577e-01, PNorm = 38.4360, GNorm = 3.7966, lr_0 = 1.5662e-04
Loss = 4.3792e-01, PNorm = 38.4428, GNorm = 3.4100, lr_0 = 1.6038e-04
Loss = 5.7288e-01, PNorm = 38.4484, GNorm = 5.4633, lr_0 = 1.6412e-04
Loss = 4.2302e-01, PNorm = 38.4552, GNorm = 5.7580, lr_0 = 1.6788e-04
Loss = 4.2991e-01, PNorm = 38.4622, GNorm = 7.3291, lr_0 = 1.7163e-04
Loss = 4.3352e-01, PNorm = 38.4684, GNorm = 31.2127, lr_0 = 1.7538e-04
Loss = 4.5545e-01, PNorm = 38.4705, GNorm = 2.5004, lr_0 = 1.7913e-04
Loss = 5.7823e-01, PNorm = 38.4735, GNorm = 10.6517, lr_0 = 1.8288e-04
Loss = 6.0598e-01, PNorm = 38.4769, GNorm = 4.3404, lr_0 = 1.8662e-04
Loss = 5.0156e-01, PNorm = 38.4817, GNorm = 8.3915, lr_0 = 1.9038e-04
Loss = 4.9087e-01, PNorm = 38.4864, GNorm = 6.5020, lr_0 = 1.9413e-04
Loss = 4.6005e-01, PNorm = 38.4930, GNorm = 3.4037, lr_0 = 1.9788e-04
Loss = 4.7685e-01, PNorm = 38.5006, GNorm = 5.1043, lr_0 = 2.0163e-04
Loss = 4.4828e-01, PNorm = 38.5088, GNorm = 7.8097, lr_0 = 2.0537e-04
Loss = 4.1725e-01, PNorm = 38.5173, GNorm = 7.0303, lr_0 = 2.0913e-04
Loss = 3.4719e-01, PNorm = 38.5233, GNorm = 8.7577, lr_0 = 2.1288e-04
Loss = 5.0515e-01, PNorm = 38.5277, GNorm = 1.8302, lr_0 = 2.1663e-04
Loss = 4.4069e-01, PNorm = 38.5326, GNorm = 15.1213, lr_0 = 2.2038e-04
Loss = 3.6613e-01, PNorm = 38.5399, GNorm = 1.8255, lr_0 = 2.2412e-04
Loss = 3.7880e-01, PNorm = 38.5475, GNorm = 7.9245, lr_0 = 2.2787e-04
Loss = 3.7073e-01, PNorm = 38.5539, GNorm = 1.7326, lr_0 = 2.3163e-04
Loss = 3.3116e-01, PNorm = 38.5617, GNorm = 4.1516, lr_0 = 2.3538e-04
Loss = 4.1221e-01, PNorm = 38.5682, GNorm = 9.4408, lr_0 = 2.3913e-04
Loss = 3.3587e-01, PNorm = 38.5747, GNorm = 2.1526, lr_0 = 2.4288e-04
Loss = 3.5075e-01, PNorm = 38.5814, GNorm = 11.4514, lr_0 = 2.4662e-04
Loss = 3.9720e-01, PNorm = 38.5861, GNorm = 15.8845, lr_0 = 2.5038e-04
Loss = 3.5331e-01, PNorm = 38.5918, GNorm = 7.4252, lr_0 = 2.5413e-04
Loss = 3.2978e-01, PNorm = 38.5988, GNorm = 4.2672, lr_0 = 2.5788e-04
Loss = 3.3632e-01, PNorm = 38.6063, GNorm = 3.1864, lr_0 = 2.6163e-04
Loss = 3.7477e-01, PNorm = 38.6122, GNorm = 2.7620, lr_0 = 2.6537e-04
Loss = 3.3482e-01, PNorm = 38.6205, GNorm = 19.2033, lr_0 = 2.6912e-04
Loss = 3.4277e-01, PNorm = 38.6272, GNorm = 10.2614, lr_0 = 2.7288e-04
Loss = 4.0330e-01, PNorm = 38.6324, GNorm = 10.5496, lr_0 = 2.7663e-04
Loss = 4.5418e-01, PNorm = 38.6386, GNorm = 8.6088, lr_0 = 2.8038e-04
Loss = 3.6266e-01, PNorm = 38.6500, GNorm = 7.4550, lr_0 = 2.8413e-04
Loss = 3.2731e-01, PNorm = 38.6609, GNorm = 6.0136, lr_0 = 2.8787e-04
Loss = 3.5047e-01, PNorm = 38.6662, GNorm = 1.5458, lr_0 = 2.9163e-04
Loss = 3.0149e-01, PNorm = 38.6714, GNorm = 1.0451, lr_0 = 2.9538e-04
Loss = 3.3146e-01, PNorm = 38.6753, GNorm = 23.3188, lr_0 = 2.9913e-04
Loss = 3.3257e-01, PNorm = 38.6804, GNorm = 2.1942, lr_0 = 3.0288e-04
Loss = 3.8670e-01, PNorm = 38.6854, GNorm = 2.2148, lr_0 = 3.0662e-04
Loss = 3.8655e-01, PNorm = 38.6934, GNorm = 9.0245, lr_0 = 3.1037e-04
Loss = 3.7151e-01, PNorm = 38.7022, GNorm = 2.8269, lr_0 = 3.1413e-04
Loss = 3.2923e-01, PNorm = 38.7126, GNorm = 10.8079, lr_0 = 3.1788e-04
Loss = 3.2181e-01, PNorm = 38.7208, GNorm = 1.5328, lr_0 = 3.2163e-04
Loss = 3.1523e-01, PNorm = 38.7252, GNorm = 3.4519, lr_0 = 3.2538e-04
Loss = 3.0474e-01, PNorm = 38.7302, GNorm = 11.7206, lr_0 = 3.2912e-04
Loss = 3.1443e-01, PNorm = 38.7370, GNorm = 9.5163, lr_0 = 3.3288e-04
Loss = 3.4449e-01, PNorm = 38.7417, GNorm = 18.3386, lr_0 = 3.3663e-04
Loss = 3.6840e-01, PNorm = 38.7451, GNorm = 1.1899, lr_0 = 3.4038e-04
Loss = 2.9221e-01, PNorm = 38.7518, GNorm = 4.4195, lr_0 = 3.4413e-04
Loss = 2.9999e-01, PNorm = 38.7600, GNorm = 2.3650, lr_0 = 3.4787e-04
Loss = 2.9539e-01, PNorm = 38.7673, GNorm = 2.6197, lr_0 = 3.5162e-04
Loss = 3.0752e-01, PNorm = 38.7756, GNorm = 3.3379, lr_0 = 3.5538e-04
Loss = 3.3080e-01, PNorm = 38.7838, GNorm = 20.0269, lr_0 = 3.5913e-04
Loss = 2.9608e-01, PNorm = 38.7918, GNorm = 4.7222, lr_0 = 3.6288e-04
Loss = 2.9335e-01, PNorm = 38.8048, GNorm = 11.2443, lr_0 = 3.6662e-04
Loss = 3.0253e-01, PNorm = 38.8147, GNorm = 11.1119, lr_0 = 3.7037e-04
Loss = 2.8939e-01, PNorm = 38.8231, GNorm = 2.1604, lr_0 = 3.7413e-04
Loss = 3.3982e-01, PNorm = 38.8274, GNorm = 15.5803, lr_0 = 3.7788e-04
Loss = 3.2176e-01, PNorm = 38.8344, GNorm = 13.1292, lr_0 = 3.8163e-04
Loss = 3.6086e-01, PNorm = 38.8423, GNorm = 7.6263, lr_0 = 3.8537e-04
Loss = 3.3390e-01, PNorm = 38.8522, GNorm = 7.0049, lr_0 = 3.8912e-04
Loss = 3.2601e-01, PNorm = 38.8615, GNorm = 4.5057, lr_0 = 3.9287e-04
Loss = 3.0065e-01, PNorm = 38.8719, GNorm = 6.6826, lr_0 = 3.9663e-04
Loss = 3.7899e-01, PNorm = 38.8774, GNorm = 15.8931, lr_0 = 4.0038e-04
Loss = 2.8254e-01, PNorm = 38.8826, GNorm = 3.4708, lr_0 = 4.0413e-04
Loss = 2.8716e-01, PNorm = 38.8916, GNorm = 5.7620, lr_0 = 4.0787e-04
Loss = 3.3850e-01, PNorm = 38.9023, GNorm = 15.0742, lr_0 = 4.1162e-04
Loss = 3.4309e-01, PNorm = 38.9131, GNorm = 4.2078, lr_0 = 4.1537e-04
Loss = 3.0315e-01, PNorm = 38.9250, GNorm = 2.4958, lr_0 = 4.1913e-04
Loss = 2.7731e-01, PNorm = 38.9332, GNorm = 5.9838, lr_0 = 4.2288e-04
Loss = 3.1480e-01, PNorm = 38.9473, GNorm = 4.5287, lr_0 = 4.2662e-04
Loss = 2.6906e-01, PNorm = 38.9547, GNorm = 5.3408, lr_0 = 4.3037e-04
Loss = 3.4170e-01, PNorm = 38.9593, GNorm = 13.1906, lr_0 = 4.3412e-04
Loss = 3.0065e-01, PNorm = 38.9708, GNorm = 9.6369, lr_0 = 4.3788e-04
Loss = 2.9932e-01, PNorm = 38.9837, GNorm = 1.0307, lr_0 = 4.4163e-04
Loss = 3.1543e-01, PNorm = 38.9979, GNorm = 13.6742, lr_0 = 4.4538e-04
Loss = 3.8523e-01, PNorm = 39.0091, GNorm = 9.9124, lr_0 = 4.4912e-04
Loss = 3.5358e-01, PNorm = 39.0164, GNorm = 9.1015, lr_0 = 4.5287e-04
Loss = 3.9459e-01, PNorm = 39.0285, GNorm = 3.9145, lr_0 = 4.5662e-04
Loss = 2.9539e-01, PNorm = 39.0403, GNorm = 5.1712, lr_0 = 4.6038e-04
Loss = 3.0525e-01, PNorm = 39.0507, GNorm = 7.0044, lr_0 = 4.6413e-04
Loss = 2.5661e-01, PNorm = 39.0649, GNorm = 7.3647, lr_0 = 4.6787e-04
Loss = 2.9023e-01, PNorm = 39.0746, GNorm = 11.0632, lr_0 = 4.7162e-04
Loss = 3.0128e-01, PNorm = 39.0839, GNorm = 1.4192, lr_0 = 4.7537e-04
Loss = 2.7095e-01, PNorm = 39.0931, GNorm = 1.2715, lr_0 = 4.7913e-04
Loss = 2.5593e-01, PNorm = 39.1023, GNorm = 3.1025, lr_0 = 4.8288e-04
Loss = 2.4191e-01, PNorm = 39.1103, GNorm = 1.8555, lr_0 = 4.8663e-04
Loss = 3.1466e-01, PNorm = 39.1157, GNorm = 16.5896, lr_0 = 4.9038e-04
Loss = 3.6126e-01, PNorm = 39.1252, GNorm = 4.2394, lr_0 = 4.9412e-04
Loss = 3.3650e-01, PNorm = 39.1388, GNorm = 2.3326, lr_0 = 4.9788e-04
Loss = 3.0138e-01, PNorm = 39.1515, GNorm = 4.1721, lr_0 = 5.0163e-04
Loss = 2.6597e-01, PNorm = 39.1627, GNorm = 2.1357, lr_0 = 5.0538e-04
Loss = 2.6814e-01, PNorm = 39.1718, GNorm = 5.1677, lr_0 = 5.0913e-04
Loss = 2.5550e-01, PNorm = 39.1797, GNorm = 1.3056, lr_0 = 5.1287e-04
Loss = 2.8321e-01, PNorm = 39.1953, GNorm = 9.2013, lr_0 = 5.1663e-04
Loss = 2.5809e-01, PNorm = 39.2090, GNorm = 3.9100, lr_0 = 5.2038e-04
Loss = 2.9599e-01, PNorm = 39.2186, GNorm = 4.1468, lr_0 = 5.2413e-04
Loss = 3.3036e-01, PNorm = 39.2347, GNorm = 7.1237, lr_0 = 5.2788e-04
Loss = 2.7181e-01, PNorm = 39.2511, GNorm = 5.7135, lr_0 = 5.3162e-04
Loss = 2.6360e-01, PNorm = 39.2642, GNorm = 3.5516, lr_0 = 5.3538e-04
Loss = 2.4181e-01, PNorm = 39.2802, GNorm = 8.5589, lr_0 = 5.3912e-04
Loss = 2.6886e-01, PNorm = 39.2912, GNorm = 1.6601, lr_0 = 5.4288e-04
Loss = 3.2392e-01, PNorm = 39.2965, GNorm = 10.1970, lr_0 = 5.4663e-04
Loss = 3.1338e-01, PNorm = 39.3086, GNorm = 10.3835, lr_0 = 5.5038e-04
Validation mae = 0.596148
Epoch 1
Loss = 3.0770e-01, PNorm = 39.3261, GNorm = 11.0788, lr_0 = 5.5413e-04
Loss = 2.8695e-01, PNorm = 39.3431, GNorm = 7.9641, lr_0 = 5.5787e-04
Loss = 2.7893e-01, PNorm = 39.3598, GNorm = 3.6698, lr_0 = 5.6163e-04
Loss = 2.9846e-01, PNorm = 39.3709, GNorm = 12.3314, lr_0 = 5.6538e-04
Loss = 2.8078e-01, PNorm = 39.3877, GNorm = 8.2050, lr_0 = 5.6913e-04
Loss = 2.5096e-01, PNorm = 39.4086, GNorm = 1.9575, lr_0 = 5.7288e-04
Loss = 2.3857e-01, PNorm = 39.4265, GNorm = 3.2852, lr_0 = 5.7662e-04
Loss = 2.3489e-01, PNorm = 39.4413, GNorm = 3.6806, lr_0 = 5.8038e-04
Loss = 2.6118e-01, PNorm = 39.4540, GNorm = 7.4148, lr_0 = 5.8413e-04
Loss = 3.0660e-01, PNorm = 39.4661, GNorm = 3.8056, lr_0 = 5.8788e-04
Loss = 3.3247e-01, PNorm = 39.4768, GNorm = 6.3146, lr_0 = 5.9163e-04
Loss = 2.9400e-01, PNorm = 39.4947, GNorm = 3.0506, lr_0 = 5.9538e-04
Loss = 2.5222e-01, PNorm = 39.5173, GNorm = 2.8923, lr_0 = 5.9913e-04
Loss = 2.2292e-01, PNorm = 39.5336, GNorm = 0.8296, lr_0 = 6.0288e-04
Loss = 2.3513e-01, PNorm = 39.5486, GNorm = 4.3298, lr_0 = 6.0663e-04
Loss = 2.7735e-01, PNorm = 39.5686, GNorm = 4.2991, lr_0 = 6.1038e-04
Loss = 2.4548e-01, PNorm = 39.5903, GNorm = 3.3878, lr_0 = 6.1413e-04
Loss = 2.7021e-01, PNorm = 39.6054, GNorm = 10.2232, lr_0 = 6.1788e-04
Loss = 3.0020e-01, PNorm = 39.6199, GNorm = 5.5189, lr_0 = 6.2163e-04
Loss = 2.4795e-01, PNorm = 39.6419, GNorm = 4.3058, lr_0 = 6.2538e-04
Loss = 2.1794e-01, PNorm = 39.6588, GNorm = 1.8387, lr_0 = 6.2913e-04
Loss = 2.1859e-01, PNorm = 39.6763, GNorm = 1.1138, lr_0 = 6.3288e-04
Loss = 2.2367e-01, PNorm = 39.6914, GNorm = 1.8602, lr_0 = 6.3663e-04
Loss = 2.4236e-01, PNorm = 39.7057, GNorm = 2.3535, lr_0 = 6.4038e-04
Loss = 2.1921e-01, PNorm = 39.7168, GNorm = 2.9317, lr_0 = 6.4413e-04
Loss = 2.7847e-01, PNorm = 39.7237, GNorm = 7.4526, lr_0 = 6.4788e-04
Loss = 3.8878e-01, PNorm = 39.7378, GNorm = 8.0597, lr_0 = 6.5163e-04
Loss = 3.1272e-01, PNorm = 39.7585, GNorm = 4.6836, lr_0 = 6.5538e-04
Loss = 2.6600e-01, PNorm = 39.7832, GNorm = 2.1654, lr_0 = 6.5913e-04
Loss = 2.6606e-01, PNorm = 39.8081, GNorm = 1.8988, lr_0 = 6.6288e-04
Loss = 2.3558e-01, PNorm = 39.8265, GNorm = 7.1166, lr_0 = 6.6663e-04
Loss = 2.4107e-01, PNorm = 39.8403, GNorm = 2.0372, lr_0 = 6.7038e-04
Loss = 2.8292e-01, PNorm = 39.8566, GNorm = 3.9824, lr_0 = 6.7413e-04
Loss = 2.6132e-01, PNorm = 39.8751, GNorm = 3.4636, lr_0 = 6.7788e-04
Loss = 2.3979e-01, PNorm = 39.8931, GNorm = 2.0961, lr_0 = 6.8163e-04
Loss = 2.8662e-01, PNorm = 39.9094, GNorm = 1.5842, lr_0 = 6.8538e-04
Loss = 2.6083e-01, PNorm = 39.9291, GNorm = 1.6835, lr_0 = 6.8913e-04
Loss = 2.8913e-01, PNorm = 39.9481, GNorm = 1.2612, lr_0 = 6.9288e-04
Loss = 2.6421e-01, PNorm = 39.9730, GNorm = 1.9108, lr_0 = 6.9663e-04
Loss = 1.9664e-01, PNorm = 39.9888, GNorm = 1.0232, lr_0 = 7.0038e-04
Loss = 2.2115e-01, PNorm = 40.0028, GNorm = 3.8124, lr_0 = 7.0413e-04
Loss = 2.2535e-01, PNorm = 40.0207, GNorm = 6.0696, lr_0 = 7.0788e-04
Loss = 2.5776e-01, PNorm = 40.0373, GNorm = 2.8280, lr_0 = 7.1163e-04
Loss = 2.9351e-01, PNorm = 40.0529, GNorm = 1.7160, lr_0 = 7.1538e-04
Loss = 2.5669e-01, PNorm = 40.0713, GNorm = 2.4176, lr_0 = 7.1913e-04
Loss = 2.4149e-01, PNorm = 40.0960, GNorm = 1.2626, lr_0 = 7.2288e-04
Loss = 2.4905e-01, PNorm = 40.1192, GNorm = 12.7725, lr_0 = 7.2663e-04
Loss = 3.4303e-01, PNorm = 40.1435, GNorm = 3.0250, lr_0 = 7.3038e-04
Loss = 2.8749e-01, PNorm = 40.1672, GNorm = 6.6197, lr_0 = 7.3413e-04
Loss = 2.9255e-01, PNorm = 40.1942, GNorm = 1.9321, lr_0 = 7.3788e-04
Loss = 2.1375e-01, PNorm = 40.2220, GNorm = 0.8957, lr_0 = 7.4163e-04
Loss = 2.5598e-01, PNorm = 40.2422, GNorm = 4.2332, lr_0 = 7.4538e-04
Loss = 2.0898e-01, PNorm = 40.2611, GNorm = 1.5604, lr_0 = 7.4913e-04
Loss = 2.4181e-01, PNorm = 40.2766, GNorm = 1.5415, lr_0 = 7.5288e-04
Loss = 2.3293e-01, PNorm = 40.2903, GNorm = 1.5319, lr_0 = 7.5663e-04
Loss = 2.0574e-01, PNorm = 40.3121, GNorm = 1.4581, lr_0 = 7.6038e-04
Loss = 2.3115e-01, PNorm = 40.3352, GNorm = 3.5025, lr_0 = 7.6413e-04
Loss = 2.3662e-01, PNorm = 40.3473, GNorm = 1.8110, lr_0 = 7.6788e-04
Loss = 2.8181e-01, PNorm = 40.3658, GNorm = 5.9402, lr_0 = 7.7163e-04
Loss = 2.3701e-01, PNorm = 40.3885, GNorm = 2.6512, lr_0 = 7.7538e-04
Loss = 2.3244e-01, PNorm = 40.4076, GNorm = 1.2360, lr_0 = 7.7913e-04
Loss = 2.3503e-01, PNorm = 40.4337, GNorm = 2.7973, lr_0 = 7.8288e-04
Loss = 2.4977e-01, PNorm = 40.4485, GNorm = 1.4723, lr_0 = 7.8663e-04
Loss = 2.5558e-01, PNorm = 40.4763, GNorm = 1.7560, lr_0 = 7.9038e-04
Loss = 2.6538e-01, PNorm = 40.5037, GNorm = 4.1907, lr_0 = 7.9413e-04
Loss = 2.5046e-01, PNorm = 40.5175, GNorm = 2.7252, lr_0 = 7.9788e-04
Loss = 2.4991e-01, PNorm = 40.5405, GNorm = 1.7247, lr_0 = 8.0163e-04
Loss = 2.4713e-01, PNorm = 40.5666, GNorm = 4.1696, lr_0 = 8.0538e-04
Loss = 2.2651e-01, PNorm = 40.5905, GNorm = 1.3115, lr_0 = 8.0913e-04
Loss = 2.1298e-01, PNorm = 40.6086, GNorm = 1.1807, lr_0 = 8.1288e-04
Loss = 2.1465e-01, PNorm = 40.6304, GNorm = 3.8928, lr_0 = 8.1663e-04
Loss = 2.1754e-01, PNorm = 40.6506, GNorm = 1.3500, lr_0 = 8.2038e-04
Loss = 1.8546e-01, PNorm = 40.6703, GNorm = 1.7385, lr_0 = 8.2413e-04
Loss = 2.1298e-01, PNorm = 40.6906, GNorm = 4.2640, lr_0 = 8.2788e-04
Loss = 2.3109e-01, PNorm = 40.7097, GNorm = 1.6505, lr_0 = 8.3163e-04
Loss = 2.3221e-01, PNorm = 40.7288, GNorm = 0.8414, lr_0 = 8.3538e-04
Loss = 2.2831e-01, PNorm = 40.7612, GNorm = 2.1516, lr_0 = 8.3913e-04
Loss = 2.4610e-01, PNorm = 40.7833, GNorm = 2.2211, lr_0 = 8.4288e-04
Loss = 2.7593e-01, PNorm = 40.8027, GNorm = 6.7494, lr_0 = 8.4663e-04
Loss = 2.6338e-01, PNorm = 40.8262, GNorm = 5.4468, lr_0 = 8.5038e-04
Loss = 2.5376e-01, PNorm = 40.8508, GNorm = 4.6250, lr_0 = 8.5413e-04
Loss = 2.6242e-01, PNorm = 40.8833, GNorm = 1.4087, lr_0 = 8.5788e-04
Loss = 2.6109e-01, PNorm = 40.9120, GNorm = 4.0584, lr_0 = 8.6163e-04
Loss = 2.4056e-01, PNorm = 40.9480, GNorm = 0.8993, lr_0 = 8.6538e-04
Loss = 2.7029e-01, PNorm = 40.9831, GNorm = 1.1340, lr_0 = 8.6913e-04
Loss = 2.4582e-01, PNorm = 41.0140, GNorm = 2.7936, lr_0 = 8.7288e-04
Loss = 2.2076e-01, PNorm = 41.0542, GNorm = 2.1977, lr_0 = 8.7663e-04
Loss = 2.0703e-01, PNorm = 41.0797, GNorm = 1.0704, lr_0 = 8.8038e-04
Loss = 2.2936e-01, PNorm = 41.0980, GNorm = 4.2811, lr_0 = 8.8413e-04
Loss = 2.3317e-01, PNorm = 41.1233, GNorm = 1.5836, lr_0 = 8.8788e-04
Loss = 2.4206e-01, PNorm = 41.1454, GNorm = 5.3065, lr_0 = 8.9163e-04
Loss = 2.1249e-01, PNorm = 41.1758, GNorm = 2.0524, lr_0 = 8.9538e-04
Loss = 2.4237e-01, PNorm = 41.1971, GNorm = 2.3658, lr_0 = 8.9913e-04
Loss = 2.7877e-01, PNorm = 41.2193, GNorm = 3.7111, lr_0 = 9.0288e-04
Loss = 2.5387e-01, PNorm = 41.2520, GNorm = 3.0243, lr_0 = 9.0663e-04
Loss = 2.0662e-01, PNorm = 41.2797, GNorm = 2.5477, lr_0 = 9.1038e-04
Loss = 2.5177e-01, PNorm = 41.3015, GNorm = 4.9716, lr_0 = 9.1413e-04
Loss = 2.0801e-01, PNorm = 41.3343, GNorm = 1.4557, lr_0 = 9.1788e-04
Loss = 2.5133e-01, PNorm = 41.3617, GNorm = 2.6600, lr_0 = 9.2163e-04
Loss = 2.9552e-01, PNorm = 41.3937, GNorm = 1.6831, lr_0 = 9.2538e-04
Loss = 2.4703e-01, PNorm = 41.4336, GNorm = 2.3747, lr_0 = 9.2913e-04
Loss = 2.1340e-01, PNorm = 41.4673, GNorm = 3.6554, lr_0 = 9.3288e-04
Loss = 2.3187e-01, PNorm = 41.4929, GNorm = 2.6808, lr_0 = 9.3663e-04
Loss = 2.3234e-01, PNorm = 41.5177, GNorm = 5.4458, lr_0 = 9.4038e-04
Loss = 2.3053e-01, PNorm = 41.5484, GNorm = 2.4233, lr_0 = 9.4413e-04
Loss = 2.0742e-01, PNorm = 41.5818, GNorm = 3.9937, lr_0 = 9.4788e-04
Loss = 2.4571e-01, PNorm = 41.5968, GNorm = 3.4952, lr_0 = 9.5163e-04
Loss = 2.2636e-01, PNorm = 41.6278, GNorm = 1.9768, lr_0 = 9.5538e-04
Loss = 2.4010e-01, PNorm = 41.6430, GNorm = 4.8452, lr_0 = 9.5913e-04
Loss = 2.5003e-01, PNorm = 41.6712, GNorm = 2.1024, lr_0 = 9.6288e-04
Loss = 2.0212e-01, PNorm = 41.7007, GNorm = 2.2294, lr_0 = 9.6663e-04
Loss = 2.0406e-01, PNorm = 41.7308, GNorm = 0.8485, lr_0 = 9.7038e-04
Loss = 1.9127e-01, PNorm = 41.7502, GNorm = 0.7080, lr_0 = 9.7413e-04
Loss = 2.1945e-01, PNorm = 41.7751, GNorm = 1.5991, lr_0 = 9.7788e-04
Loss = 2.4429e-01, PNorm = 41.8012, GNorm = 3.5383, lr_0 = 9.8163e-04
Loss = 2.2775e-01, PNorm = 41.8437, GNorm = 3.1351, lr_0 = 9.8537e-04
Loss = 1.8414e-01, PNorm = 41.8827, GNorm = 3.4979, lr_0 = 9.8912e-04
Loss = 2.1824e-01, PNorm = 41.9105, GNorm = 1.6910, lr_0 = 9.9288e-04
Loss = 2.6826e-01, PNorm = 41.9509, GNorm = 2.9612, lr_0 = 9.9663e-04
Loss = 2.3598e-01, PNorm = 41.9859, GNorm = 1.6086, lr_0 = 9.9993e-04
Validation mae = 0.509613
Epoch 2
Loss = 1.9731e-01, PNorm = 42.0185, GNorm = 1.1813, lr_0 = 9.9925e-04
Loss = 2.1642e-01, PNorm = 42.0544, GNorm = 2.6749, lr_0 = 9.9856e-04
Loss = 2.0519e-01, PNorm = 42.0882, GNorm = 3.2693, lr_0 = 9.9788e-04
Loss = 1.7193e-01, PNorm = 42.1176, GNorm = 1.5819, lr_0 = 9.9719e-04
Loss = 2.0408e-01, PNorm = 42.1468, GNorm = 2.7523, lr_0 = 9.9651e-04
Loss = 2.0311e-01, PNorm = 42.1726, GNorm = 0.8989, lr_0 = 9.9583e-04
Loss = 2.1353e-01, PNorm = 42.1861, GNorm = 1.3578, lr_0 = 9.9515e-04
Loss = 2.2331e-01, PNorm = 42.2135, GNorm = 2.4016, lr_0 = 9.9446e-04
Loss = 2.2695e-01, PNorm = 42.2455, GNorm = 1.3431, lr_0 = 9.9378e-04
Loss = 2.0217e-01, PNorm = 42.2756, GNorm = 1.9537, lr_0 = 9.9310e-04
Loss = 2.1849e-01, PNorm = 42.2989, GNorm = 1.1307, lr_0 = 9.9242e-04
Loss = 2.2327e-01, PNorm = 42.3247, GNorm = 6.0255, lr_0 = 9.9174e-04
Loss = 2.1941e-01, PNorm = 42.3484, GNorm = 3.9791, lr_0 = 9.9106e-04
Loss = 2.2807e-01, PNorm = 42.3733, GNorm = 3.1218, lr_0 = 9.9038e-04
Loss = 2.3062e-01, PNorm = 42.3971, GNorm = 2.9812, lr_0 = 9.8971e-04
Loss = 1.7519e-01, PNorm = 42.4306, GNorm = 1.8629, lr_0 = 9.8903e-04
Loss = 2.2761e-01, PNorm = 42.4546, GNorm = 1.3772, lr_0 = 9.8835e-04
Loss = 1.9383e-01, PNorm = 42.4824, GNorm = 0.9444, lr_0 = 9.8767e-04
Loss = 1.9262e-01, PNorm = 42.5092, GNorm = 2.0441, lr_0 = 9.8700e-04
Loss = 1.9206e-01, PNorm = 42.5371, GNorm = 2.5467, lr_0 = 9.8632e-04
Loss = 1.9290e-01, PNorm = 42.5557, GNorm = 4.5301, lr_0 = 9.8564e-04
Loss = 1.9574e-01, PNorm = 42.5714, GNorm = 3.9873, lr_0 = 9.8497e-04
Loss = 2.0626e-01, PNorm = 42.5950, GNorm = 3.3354, lr_0 = 9.8429e-04
Loss = 2.2927e-01, PNorm = 42.6197, GNorm = 6.3727, lr_0 = 9.8362e-04
Loss = 1.9826e-01, PNorm = 42.6511, GNorm = 2.0209, lr_0 = 9.8295e-04
Loss = 2.0354e-01, PNorm = 42.6894, GNorm = 5.7005, lr_0 = 9.8227e-04
Loss = 1.8491e-01, PNorm = 42.7190, GNorm = 1.1294, lr_0 = 9.8160e-04
Loss = 1.8674e-01, PNorm = 42.7394, GNorm = 0.7401, lr_0 = 9.8093e-04
Loss = 1.9955e-01, PNorm = 42.7568, GNorm = 3.1502, lr_0 = 9.8026e-04
Loss = 1.8858e-01, PNorm = 42.7813, GNorm = 1.4780, lr_0 = 9.7958e-04
Loss = 1.9460e-01, PNorm = 42.8072, GNorm = 1.3518, lr_0 = 9.7891e-04
Loss = 1.9176e-01, PNorm = 42.8356, GNorm = 3.6209, lr_0 = 9.7824e-04
Loss = 1.8927e-01, PNorm = 42.8623, GNorm = 1.0439, lr_0 = 9.7757e-04
Loss = 2.1191e-01, PNorm = 42.8985, GNorm = 4.2188, lr_0 = 9.7690e-04
Loss = 2.0199e-01, PNorm = 42.9353, GNorm = 2.2674, lr_0 = 9.7623e-04
Loss = 2.0124e-01, PNorm = 42.9641, GNorm = 2.2088, lr_0 = 9.7556e-04
Loss = 2.1421e-01, PNorm = 42.9994, GNorm = 3.5698, lr_0 = 9.7490e-04
Loss = 2.1701e-01, PNorm = 43.0323, GNorm = 1.7081, lr_0 = 9.7423e-04
Loss = 2.0023e-01, PNorm = 43.0604, GNorm = 1.7899, lr_0 = 9.7356e-04
Loss = 2.0362e-01, PNorm = 43.0876, GNorm = 0.6578, lr_0 = 9.7289e-04
Loss = 2.0530e-01, PNorm = 43.0962, GNorm = 2.3957, lr_0 = 9.7223e-04
Loss = 2.1740e-01, PNorm = 43.1139, GNorm = 2.6551, lr_0 = 9.7156e-04
Loss = 1.9733e-01, PNorm = 43.1469, GNorm = 1.3402, lr_0 = 9.7090e-04
Loss = 2.1052e-01, PNorm = 43.1802, GNorm = 1.8690, lr_0 = 9.7023e-04
Loss = 2.0965e-01, PNorm = 43.2055, GNorm = 1.8083, lr_0 = 9.6957e-04
Loss = 2.1505e-01, PNorm = 43.2277, GNorm = 4.1512, lr_0 = 9.6890e-04
Loss = 2.0132e-01, PNorm = 43.2506, GNorm = 1.1772, lr_0 = 9.6824e-04
Loss = 2.1062e-01, PNorm = 43.2758, GNorm = 0.8366, lr_0 = 9.6757e-04
Loss = 2.0763e-01, PNorm = 43.3078, GNorm = 3.8421, lr_0 = 9.6691e-04
Loss = 2.2843e-01, PNorm = 43.3305, GNorm = 1.0245, lr_0 = 9.6625e-04
Loss = 1.8915e-01, PNorm = 43.3526, GNorm = 1.6272, lr_0 = 9.6559e-04
Loss = 1.9662e-01, PNorm = 43.3827, GNorm = 0.7407, lr_0 = 9.6493e-04
Loss = 2.1545e-01, PNorm = 43.4072, GNorm = 3.6179, lr_0 = 9.6427e-04
Loss = 2.1911e-01, PNorm = 43.4392, GNorm = 2.0982, lr_0 = 9.6360e-04
Loss = 1.9618e-01, PNorm = 43.4645, GNorm = 0.8573, lr_0 = 9.6294e-04
Loss = 2.3668e-01, PNorm = 43.4872, GNorm = 1.5534, lr_0 = 9.6228e-04
Loss = 2.4601e-01, PNorm = 43.5168, GNorm = 3.1481, lr_0 = 9.6163e-04
Loss = 2.2115e-01, PNorm = 43.5589, GNorm = 3.1873, lr_0 = 9.6097e-04
Loss = 2.1052e-01, PNorm = 43.5895, GNorm = 3.8588, lr_0 = 9.6031e-04
Loss = 2.0334e-01, PNorm = 43.6076, GNorm = 4.1311, lr_0 = 9.5965e-04
Loss = 2.0719e-01, PNorm = 43.6276, GNorm = 3.6266, lr_0 = 9.5899e-04
Loss = 2.2888e-01, PNorm = 43.6468, GNorm = 0.8811, lr_0 = 9.5834e-04
Loss = 2.0393e-01, PNorm = 43.6725, GNorm = 0.9012, lr_0 = 9.5768e-04
Loss = 1.8688e-01, PNorm = 43.7042, GNorm = 1.2527, lr_0 = 9.5702e-04
Loss = 1.8961e-01, PNorm = 43.7427, GNorm = 1.1114, lr_0 = 9.5637e-04
Loss = 1.9115e-01, PNorm = 43.7589, GNorm = 1.2622, lr_0 = 9.5571e-04
Loss = 2.1685e-01, PNorm = 43.7849, GNorm = 1.3049, lr_0 = 9.5506e-04
Loss = 1.9854e-01, PNorm = 43.8139, GNorm = 1.5121, lr_0 = 9.5440e-04
Loss = 1.9142e-01, PNorm = 43.8444, GNorm = 1.2926, lr_0 = 9.5375e-04
Loss = 1.8990e-01, PNorm = 43.8771, GNorm = 0.7858, lr_0 = 9.5310e-04
Loss = 2.0484e-01, PNorm = 43.8930, GNorm = 2.1917, lr_0 = 9.5244e-04
Loss = 1.9253e-01, PNorm = 43.9101, GNorm = 2.5872, lr_0 = 9.5179e-04
Loss = 1.6500e-01, PNorm = 43.9288, GNorm = 0.9016, lr_0 = 9.5114e-04
Loss = 2.0212e-01, PNorm = 43.9528, GNorm = 1.1910, lr_0 = 9.5049e-04
Loss = 2.0296e-01, PNorm = 43.9858, GNorm = 1.7965, lr_0 = 9.4984e-04
Loss = 2.0778e-01, PNorm = 44.0127, GNorm = 2.6205, lr_0 = 9.4919e-04
Loss = 2.0033e-01, PNorm = 44.0345, GNorm = 3.9989, lr_0 = 9.4854e-04
Loss = 2.0986e-01, PNorm = 44.0750, GNorm = 4.2847, lr_0 = 9.4789e-04
Loss = 1.8897e-01, PNorm = 44.1138, GNorm = 2.0739, lr_0 = 9.4724e-04
Loss = 1.8588e-01, PNorm = 44.1421, GNorm = 3.1869, lr_0 = 9.4659e-04
Loss = 1.9856e-01, PNorm = 44.1646, GNorm = 2.0957, lr_0 = 9.4594e-04
Loss = 1.7359e-01, PNorm = 44.1853, GNorm = 2.3888, lr_0 = 9.4529e-04
Loss = 1.4886e-01, PNorm = 44.2110, GNorm = 1.6188, lr_0 = 9.4464e-04
Loss = 1.6404e-01, PNorm = 44.2307, GNorm = 2.0287, lr_0 = 9.4400e-04
Loss = 1.8835e-01, PNorm = 44.2527, GNorm = 2.6599, lr_0 = 9.4335e-04
Loss = 2.0605e-01, PNorm = 44.2713, GNorm = 1.0567, lr_0 = 9.4270e-04
Loss = 1.9058e-01, PNorm = 44.2931, GNorm = 3.3276, lr_0 = 9.4206e-04
Loss = 1.9078e-01, PNorm = 44.3168, GNorm = 1.0318, lr_0 = 9.4141e-04
Loss = 2.1897e-01, PNorm = 44.3358, GNorm = 2.2006, lr_0 = 9.4077e-04
Loss = 1.7804e-01, PNorm = 44.3641, GNorm = 1.0988, lr_0 = 9.4012e-04
Loss = 1.6105e-01, PNorm = 44.3890, GNorm = 1.0149, lr_0 = 9.3948e-04
Loss = 1.8958e-01, PNorm = 44.4082, GNorm = 1.8975, lr_0 = 9.3884e-04
Loss = 1.9165e-01, PNorm = 44.4282, GNorm = 0.7151, lr_0 = 9.3819e-04
Loss = 1.8623e-01, PNorm = 44.4447, GNorm = 0.9570, lr_0 = 9.3755e-04
Loss = 1.5939e-01, PNorm = 44.4695, GNorm = 1.2419, lr_0 = 9.3691e-04
Loss = 2.0114e-01, PNorm = 44.4898, GNorm = 4.4249, lr_0 = 9.3627e-04
Loss = 1.8854e-01, PNorm = 44.5211, GNorm = 1.6886, lr_0 = 9.3562e-04
Loss = 1.9229e-01, PNorm = 44.5506, GNorm = 1.3305, lr_0 = 9.3498e-04
Loss = 2.0155e-01, PNorm = 44.5855, GNorm = 0.8118, lr_0 = 9.3434e-04
Loss = 1.9670e-01, PNorm = 44.6166, GNorm = 1.0939, lr_0 = 9.3370e-04
Loss = 2.2657e-01, PNorm = 44.6468, GNorm = 2.0853, lr_0 = 9.3306e-04
Loss = 2.0713e-01, PNorm = 44.6821, GNorm = 0.6555, lr_0 = 9.3242e-04
Loss = 1.8472e-01, PNorm = 44.7102, GNorm = 3.0674, lr_0 = 9.3178e-04
Loss = 1.5540e-01, PNorm = 44.7335, GNorm = 1.0451, lr_0 = 9.3115e-04
Loss = 1.6338e-01, PNorm = 44.7551, GNorm = 1.5896, lr_0 = 9.3051e-04
Loss = 1.9466e-01, PNorm = 44.7747, GNorm = 0.8693, lr_0 = 9.2987e-04
Loss = 1.8140e-01, PNorm = 44.7942, GNorm = 3.7475, lr_0 = 9.2923e-04
Loss = 2.0533e-01, PNorm = 44.8217, GNorm = 2.3170, lr_0 = 9.2860e-04
Loss = 2.3428e-01, PNorm = 44.8567, GNorm = 3.1208, lr_0 = 9.2796e-04
Loss = 2.0852e-01, PNorm = 44.8890, GNorm = 2.4131, lr_0 = 9.2733e-04
Loss = 1.9224e-01, PNorm = 44.9128, GNorm = 2.7665, lr_0 = 9.2669e-04
Loss = 1.9049e-01, PNorm = 44.9347, GNorm = 0.8487, lr_0 = 9.2606e-04
Loss = 1.8595e-01, PNorm = 44.9551, GNorm = 1.2618, lr_0 = 9.2542e-04
Loss = 1.9003e-01, PNorm = 44.9804, GNorm = 2.7674, lr_0 = 9.2479e-04
Loss = 1.9111e-01, PNorm = 45.0017, GNorm = 1.7359, lr_0 = 9.2415e-04
Loss = 1.7556e-01, PNorm = 45.0289, GNorm = 2.1203, lr_0 = 9.2352e-04
Loss = 1.8350e-01, PNorm = 45.0456, GNorm = 0.9533, lr_0 = 9.2289e-04
Loss = 1.7345e-01, PNorm = 45.0586, GNorm = 0.6926, lr_0 = 9.2226e-04
Loss = 1.6305e-01, PNorm = 45.0803, GNorm = 1.2582, lr_0 = 9.2162e-04
Loss = 1.8792e-01, PNorm = 45.1001, GNorm = 2.9806, lr_0 = 9.2099e-04
Validation mae = 0.470389
Epoch 3
Loss = 1.7776e-01, PNorm = 45.1196, GNorm = 2.9196, lr_0 = 9.2036e-04
Loss = 1.5702e-01, PNorm = 45.1404, GNorm = 0.7906, lr_0 = 9.1973e-04
Loss = 1.8216e-01, PNorm = 45.1645, GNorm = 1.4889, lr_0 = 9.1910e-04
Loss = 1.4402e-01, PNorm = 45.1855, GNorm = 0.9166, lr_0 = 9.1847e-04
Loss = 1.8050e-01, PNorm = 45.2120, GNorm = 0.8687, lr_0 = 9.1784e-04
Loss = 1.6393e-01, PNorm = 45.2328, GNorm = 1.1302, lr_0 = 9.1721e-04
Loss = 2.0359e-01, PNorm = 45.2540, GNorm = 5.6679, lr_0 = 9.1658e-04
Loss = 1.7468e-01, PNorm = 45.2883, GNorm = 1.0174, lr_0 = 9.1596e-04
Loss = 1.7124e-01, PNorm = 45.3165, GNorm = 1.5513, lr_0 = 9.1533e-04
Loss = 1.8628e-01, PNorm = 45.3479, GNorm = 0.9632, lr_0 = 9.1470e-04
Loss = 1.7731e-01, PNorm = 45.3706, GNorm = 0.8380, lr_0 = 9.1408e-04
Loss = 2.0345e-01, PNorm = 45.3896, GNorm = 2.5734, lr_0 = 9.1345e-04
Loss = 1.6095e-01, PNorm = 45.4225, GNorm = 1.2102, lr_0 = 9.1282e-04
Loss = 1.6760e-01, PNorm = 45.4536, GNorm = 0.8480, lr_0 = 9.1220e-04
Loss = 1.4970e-01, PNorm = 45.4717, GNorm = 1.3012, lr_0 = 9.1157e-04
Loss = 1.8747e-01, PNorm = 45.4885, GNorm = 0.8902, lr_0 = 9.1095e-04
Loss = 1.6769e-01, PNorm = 45.5135, GNorm = 3.4295, lr_0 = 9.1032e-04
Loss = 1.8031e-01, PNorm = 45.5346, GNorm = 1.2375, lr_0 = 9.0970e-04
Loss = 1.7130e-01, PNorm = 45.5667, GNorm = 2.3422, lr_0 = 9.0908e-04
Loss = 1.6220e-01, PNorm = 45.5958, GNorm = 3.6997, lr_0 = 9.0846e-04
Loss = 1.6187e-01, PNorm = 45.6125, GNorm = 1.5409, lr_0 = 9.0783e-04
Loss = 1.8617e-01, PNorm = 45.6361, GNorm = 1.8441, lr_0 = 9.0721e-04
Loss = 1.6661e-01, PNorm = 45.6554, GNorm = 2.8292, lr_0 = 9.0659e-04
Loss = 1.9636e-01, PNorm = 45.6806, GNorm = 1.8091, lr_0 = 9.0597e-04
Loss = 2.2231e-01, PNorm = 45.7132, GNorm = 4.2555, lr_0 = 9.0535e-04
Loss = 2.2570e-01, PNorm = 45.7523, GNorm = 6.3482, lr_0 = 9.0473e-04
Loss = 2.2980e-01, PNorm = 45.7918, GNorm = 1.0791, lr_0 = 9.0411e-04
Loss = 1.9290e-01, PNorm = 45.8321, GNorm = 0.7828, lr_0 = 9.0349e-04
Loss = 1.6217e-01, PNorm = 45.8634, GNorm = 0.8426, lr_0 = 9.0287e-04
Loss = 1.8166e-01, PNorm = 45.8898, GNorm = 1.2753, lr_0 = 9.0225e-04
Loss = 1.7195e-01, PNorm = 45.9164, GNorm = 2.4877, lr_0 = 9.0163e-04
Loss = 1.4699e-01, PNorm = 45.9437, GNorm = 1.6032, lr_0 = 9.0102e-04
Loss = 1.6857e-01, PNorm = 45.9682, GNorm = 1.8082, lr_0 = 9.0040e-04
Loss = 1.8417e-01, PNorm = 45.9937, GNorm = 2.2641, lr_0 = 8.9978e-04
Loss = 1.8813e-01, PNorm = 46.0204, GNorm = 0.7568, lr_0 = 8.9916e-04
Loss = 1.4711e-01, PNorm = 46.0440, GNorm = 1.3392, lr_0 = 8.9855e-04
Loss = 1.5031e-01, PNorm = 46.0751, GNorm = 0.8225, lr_0 = 8.9793e-04
Loss = 1.5400e-01, PNorm = 46.0953, GNorm = 0.5973, lr_0 = 8.9732e-04
Loss = 1.6357e-01, PNorm = 46.1089, GNorm = 0.7467, lr_0 = 8.9670e-04
Loss = 1.7523e-01, PNorm = 46.1260, GNorm = 3.6450, lr_0 = 8.9609e-04
Loss = 1.7248e-01, PNorm = 46.1501, GNorm = 0.6794, lr_0 = 8.9548e-04
Loss = 1.5723e-01, PNorm = 46.1777, GNorm = 1.5442, lr_0 = 8.9486e-04
Loss = 1.8117e-01, PNorm = 46.1991, GNorm = 2.5874, lr_0 = 8.9425e-04
Loss = 1.4456e-01, PNorm = 46.2212, GNorm = 0.7286, lr_0 = 8.9364e-04
Loss = 1.8201e-01, PNorm = 46.2322, GNorm = 0.6499, lr_0 = 8.9302e-04
Loss = 1.2914e-01, PNorm = 46.2518, GNorm = 0.7468, lr_0 = 8.9241e-04
Loss = 1.6659e-01, PNorm = 46.2773, GNorm = 0.7908, lr_0 = 8.9180e-04
Loss = 1.6488e-01, PNorm = 46.3010, GNorm = 2.0110, lr_0 = 8.9119e-04
Loss = 1.5294e-01, PNorm = 46.3170, GNorm = 0.6239, lr_0 = 8.9058e-04
Loss = 1.5939e-01, PNorm = 46.3325, GNorm = 2.0823, lr_0 = 8.8997e-04
Loss = 1.7205e-01, PNorm = 46.3445, GNorm = 0.7238, lr_0 = 8.8936e-04
Loss = 1.7400e-01, PNorm = 46.3596, GNorm = 2.7908, lr_0 = 8.8875e-04
Loss = 1.8505e-01, PNorm = 46.3758, GNorm = 3.9089, lr_0 = 8.8814e-04
Loss = 2.1699e-01, PNorm = 46.3986, GNorm = 1.1588, lr_0 = 8.8753e-04
Loss = 2.2097e-01, PNorm = 46.4287, GNorm = 1.9658, lr_0 = 8.8693e-04
Loss = 1.9016e-01, PNorm = 46.4494, GNorm = 2.8712, lr_0 = 8.8632e-04
Loss = 1.6528e-01, PNorm = 46.4669, GNorm = 1.4147, lr_0 = 8.8571e-04
Loss = 1.9089e-01, PNorm = 46.4886, GNorm = 1.0800, lr_0 = 8.8510e-04
Loss = 1.6091e-01, PNorm = 46.5076, GNorm = 0.9365, lr_0 = 8.8450e-04
Loss = 1.6598e-01, PNorm = 46.5274, GNorm = 1.7346, lr_0 = 8.8389e-04
Loss = 1.5133e-01, PNorm = 46.5432, GNorm = 0.7943, lr_0 = 8.8329e-04
Loss = 1.8462e-01, PNorm = 46.5613, GNorm = 2.0448, lr_0 = 8.8268e-04
Loss = 1.5820e-01, PNorm = 46.5785, GNorm = 1.6639, lr_0 = 8.8208e-04
Loss = 1.8162e-01, PNorm = 46.6013, GNorm = 0.7898, lr_0 = 8.8147e-04
Loss = 1.4474e-01, PNorm = 46.6237, GNorm = 0.7290, lr_0 = 8.8087e-04
Loss = 1.5212e-01, PNorm = 46.6486, GNorm = 0.9393, lr_0 = 8.8026e-04
Loss = 1.5839e-01, PNorm = 46.6662, GNorm = 1.1057, lr_0 = 8.7966e-04
Loss = 1.3514e-01, PNorm = 46.6833, GNorm = 0.6549, lr_0 = 8.7906e-04
Loss = 1.6257e-01, PNorm = 46.6999, GNorm = 0.9189, lr_0 = 8.7846e-04
Loss = 1.8037e-01, PNorm = 46.7195, GNorm = 0.9786, lr_0 = 8.7785e-04
Loss = 1.6561e-01, PNorm = 46.7386, GNorm = 2.1366, lr_0 = 8.7725e-04
Loss = 1.5764e-01, PNorm = 46.7460, GNorm = 3.4765, lr_0 = 8.7665e-04
Loss = 1.5860e-01, PNorm = 46.7617, GNorm = 1.8082, lr_0 = 8.7605e-04
Loss = 1.6983e-01, PNorm = 46.7880, GNorm = 2.7260, lr_0 = 8.7545e-04
Loss = 1.5438e-01, PNorm = 46.8153, GNorm = 0.9093, lr_0 = 8.7485e-04
Loss = 1.4785e-01, PNorm = 46.8365, GNorm = 1.9864, lr_0 = 8.7425e-04
Loss = 2.0025e-01, PNorm = 46.8559, GNorm = 2.9819, lr_0 = 8.7365e-04
Loss = 2.1221e-01, PNorm = 46.8822, GNorm = 3.4204, lr_0 = 8.7306e-04
Loss = 1.9027e-01, PNorm = 46.9161, GNorm = 0.9899, lr_0 = 8.7246e-04
Loss = 2.1336e-01, PNorm = 46.9575, GNorm = 0.9356, lr_0 = 8.7186e-04
Loss = 1.7837e-01, PNorm = 46.9953, GNorm = 3.1107, lr_0 = 8.7126e-04
Loss = 1.9701e-01, PNorm = 47.0264, GNorm = 2.1286, lr_0 = 8.7067e-04
Loss = 1.6863e-01, PNorm = 47.0504, GNorm = 0.8296, lr_0 = 8.7007e-04
Loss = 1.6802e-01, PNorm = 47.0727, GNorm = 1.3688, lr_0 = 8.6947e-04
Loss = 1.4421e-01, PNorm = 47.0870, GNorm = 2.2209, lr_0 = 8.6888e-04
Loss = 2.0281e-01, PNorm = 47.1106, GNorm = 0.6150, lr_0 = 8.6828e-04
Loss = 1.7993e-01, PNorm = 47.1329, GNorm = 0.8830, lr_0 = 8.6769e-04
Loss = 1.7668e-01, PNorm = 47.1666, GNorm = 1.0364, lr_0 = 8.6709e-04
Loss = 1.6459e-01, PNorm = 47.1932, GNorm = 1.3723, lr_0 = 8.6650e-04
Loss = 1.7814e-01, PNorm = 47.2173, GNorm = 1.0724, lr_0 = 8.6590e-04
Loss = 1.4419e-01, PNorm = 47.2410, GNorm = 0.7016, lr_0 = 8.6531e-04
Loss = 1.6557e-01, PNorm = 47.2589, GNorm = 2.4192, lr_0 = 8.6472e-04
Loss = 1.5070e-01, PNorm = 47.2826, GNorm = 1.0943, lr_0 = 8.6413e-04
Loss = 1.5508e-01, PNorm = 47.3050, GNorm = 1.7550, lr_0 = 8.6353e-04
Loss = 1.7140e-01, PNorm = 47.3315, GNorm = 3.0398, lr_0 = 8.6294e-04
Loss = 1.6130e-01, PNorm = 47.3581, GNorm = 1.8776, lr_0 = 8.6235e-04
Loss = 1.6046e-01, PNorm = 47.3773, GNorm = 0.9429, lr_0 = 8.6176e-04
Loss = 1.5090e-01, PNorm = 47.3976, GNorm = 2.0388, lr_0 = 8.6117e-04
Loss = 1.5767e-01, PNorm = 47.4170, GNorm = 0.7410, lr_0 = 8.6058e-04
Loss = 1.6379e-01, PNorm = 47.4374, GNorm = 1.0172, lr_0 = 8.5999e-04
Loss = 1.8635e-01, PNorm = 47.4545, GNorm = 0.8978, lr_0 = 8.5940e-04
Loss = 1.4992e-01, PNorm = 47.4709, GNorm = 0.7463, lr_0 = 8.5881e-04
Loss = 1.5362e-01, PNorm = 47.4835, GNorm = 0.6264, lr_0 = 8.5823e-04
Loss = 1.4001e-01, PNorm = 47.5048, GNorm = 1.5012, lr_0 = 8.5764e-04
Loss = 1.4584e-01, PNorm = 47.5228, GNorm = 1.2393, lr_0 = 8.5705e-04
Loss = 1.5008e-01, PNorm = 47.5370, GNorm = 1.2535, lr_0 = 8.5646e-04
Loss = 1.5276e-01, PNorm = 47.5523, GNorm = 2.4846, lr_0 = 8.5588e-04
Loss = 1.5317e-01, PNorm = 47.5717, GNorm = 1.1100, lr_0 = 8.5529e-04
Loss = 1.7619e-01, PNorm = 47.5904, GNorm = 0.9560, lr_0 = 8.5470e-04
Loss = 1.9242e-01, PNorm = 47.6156, GNorm = 1.1870, lr_0 = 8.5412e-04
Loss = 1.5633e-01, PNorm = 47.6484, GNorm = 1.9784, lr_0 = 8.5353e-04
Loss = 1.5427e-01, PNorm = 47.6730, GNorm = 1.3054, lr_0 = 8.5295e-04
Loss = 1.8007e-01, PNorm = 47.6910, GNorm = 0.9912, lr_0 = 8.5236e-04
Loss = 2.1560e-01, PNorm = 47.7104, GNorm = 2.1454, lr_0 = 8.5178e-04
Loss = 1.7810e-01, PNorm = 47.7475, GNorm = 1.6902, lr_0 = 8.5120e-04
Loss = 1.8665e-01, PNorm = 47.7726, GNorm = 3.0647, lr_0 = 8.5061e-04
Loss = 1.8910e-01, PNorm = 47.7987, GNorm = 3.3103, lr_0 = 8.5003e-04
Loss = 1.7124e-01, PNorm = 47.8304, GNorm = 2.5701, lr_0 = 8.4945e-04
Loss = 1.6441e-01, PNorm = 47.8649, GNorm = 1.3542, lr_0 = 8.4887e-04
Loss = 1.7223e-01, PNorm = 47.8862, GNorm = 1.2594, lr_0 = 8.4828e-04
Validation mae = 0.460926
Epoch 4
Loss = 1.3789e-01, PNorm = 47.8938, GNorm = 0.9752, lr_0 = 8.4770e-04
Loss = 1.4159e-01, PNorm = 47.9109, GNorm = 0.6343, lr_0 = 8.4712e-04
Loss = 1.7933e-01, PNorm = 47.9289, GNorm = 1.3983, lr_0 = 8.4654e-04
Loss = 1.3034e-01, PNorm = 47.9486, GNorm = 0.9475, lr_0 = 8.4596e-04
Loss = 1.6688e-01, PNorm = 47.9778, GNorm = 1.3484, lr_0 = 8.4538e-04
Loss = 1.6737e-01, PNorm = 48.0096, GNorm = 2.3013, lr_0 = 8.4480e-04
Loss = 1.6601e-01, PNorm = 48.0380, GNorm = 1.2025, lr_0 = 8.4423e-04
Loss = 1.9384e-01, PNorm = 48.0700, GNorm = 1.8774, lr_0 = 8.4365e-04
Loss = 1.6363e-01, PNorm = 48.0945, GNorm = 1.9394, lr_0 = 8.4307e-04
Loss = 1.3652e-01, PNorm = 48.1117, GNorm = 0.6731, lr_0 = 8.4249e-04
Loss = 1.3515e-01, PNorm = 48.1314, GNorm = 0.8009, lr_0 = 8.4191e-04
Loss = 1.2582e-01, PNorm = 48.1469, GNorm = 0.9683, lr_0 = 8.4134e-04
Loss = 1.4027e-01, PNorm = 48.1690, GNorm = 0.6952, lr_0 = 8.4076e-04
Loss = 1.4245e-01, PNorm = 48.1836, GNorm = 1.2457, lr_0 = 8.4019e-04
Loss = 1.4604e-01, PNorm = 48.1954, GNorm = 1.5349, lr_0 = 8.3961e-04
Loss = 1.3588e-01, PNorm = 48.2201, GNorm = 1.1810, lr_0 = 8.3903e-04
Loss = 1.4050e-01, PNorm = 48.2506, GNorm = 2.1221, lr_0 = 8.3846e-04
Loss = 1.4986e-01, PNorm = 48.2702, GNorm = 1.2102, lr_0 = 8.3789e-04
Loss = 1.9511e-01, PNorm = 48.2905, GNorm = 1.5150, lr_0 = 8.3731e-04
Loss = 1.6855e-01, PNorm = 48.3164, GNorm = 1.0893, lr_0 = 8.3674e-04
Loss = 1.2385e-01, PNorm = 48.3393, GNorm = 1.2820, lr_0 = 8.3616e-04
Loss = 1.3584e-01, PNorm = 48.3610, GNorm = 0.7468, lr_0 = 8.3559e-04
Loss = 1.3998e-01, PNorm = 48.3790, GNorm = 1.8630, lr_0 = 8.3502e-04
Loss = 1.6822e-01, PNorm = 48.3860, GNorm = 1.8201, lr_0 = 8.3445e-04
Loss = 1.7249e-01, PNorm = 48.4092, GNorm = 0.9862, lr_0 = 8.3388e-04
Loss = 1.6695e-01, PNorm = 48.4338, GNorm = 2.0060, lr_0 = 8.3330e-04
Loss = 1.3993e-01, PNorm = 48.4450, GNorm = 0.8105, lr_0 = 8.3273e-04
Loss = 1.5849e-01, PNorm = 48.4558, GNorm = 1.3365, lr_0 = 8.3216e-04
Loss = 1.3208e-01, PNorm = 48.4692, GNorm = 2.0806, lr_0 = 8.3159e-04
Loss = 1.5977e-01, PNorm = 48.4905, GNorm = 0.7889, lr_0 = 8.3102e-04
Loss = 1.3261e-01, PNorm = 48.5062, GNorm = 0.6710, lr_0 = 8.3045e-04
Loss = 1.4669e-01, PNorm = 48.5151, GNorm = 1.2056, lr_0 = 8.2988e-04
Loss = 1.5669e-01, PNorm = 48.5316, GNorm = 0.8127, lr_0 = 8.2932e-04
Loss = 1.5068e-01, PNorm = 48.5563, GNorm = 0.9032, lr_0 = 8.2875e-04
Loss = 1.4079e-01, PNorm = 48.5824, GNorm = 1.4417, lr_0 = 8.2818e-04
Loss = 1.4372e-01, PNorm = 48.6072, GNorm = 1.5970, lr_0 = 8.2761e-04
Loss = 1.3068e-01, PNorm = 48.6309, GNorm = 1.4156, lr_0 = 8.2705e-04
Loss = 1.5749e-01, PNorm = 48.6533, GNorm = 0.7103, lr_0 = 8.2648e-04
Loss = 1.5532e-01, PNorm = 48.6731, GNorm = 1.0080, lr_0 = 8.2591e-04
Loss = 1.5037e-01, PNorm = 48.6930, GNorm = 1.2661, lr_0 = 8.2535e-04
Loss = 1.5728e-01, PNorm = 48.7166, GNorm = 1.1085, lr_0 = 8.2478e-04
Loss = 1.5435e-01, PNorm = 48.7490, GNorm = 1.4295, lr_0 = 8.2422e-04
Loss = 1.3066e-01, PNorm = 48.7705, GNorm = 1.1822, lr_0 = 8.2365e-04
Loss = 1.5573e-01, PNorm = 48.7909, GNorm = 2.0641, lr_0 = 8.2309e-04
Loss = 1.4690e-01, PNorm = 48.8117, GNorm = 1.0430, lr_0 = 8.2252e-04
Loss = 1.3239e-01, PNorm = 48.8322, GNorm = 1.4275, lr_0 = 8.2196e-04
Loss = 1.5215e-01, PNorm = 48.8573, GNorm = 1.6776, lr_0 = 8.2140e-04
Loss = 1.6904e-01, PNorm = 48.8681, GNorm = 0.6651, lr_0 = 8.2084e-04
Loss = 1.6394e-01, PNorm = 48.8834, GNorm = 0.6647, lr_0 = 8.2027e-04
Loss = 1.5881e-01, PNorm = 48.9034, GNorm = 1.8880, lr_0 = 8.1971e-04
Loss = 1.6300e-01, PNorm = 48.9231, GNorm = 0.9237, lr_0 = 8.1915e-04
Loss = 1.7894e-01, PNorm = 48.9441, GNorm = 0.7176, lr_0 = 8.1859e-04
Loss = 1.6511e-01, PNorm = 48.9777, GNorm = 1.4826, lr_0 = 8.1803e-04
Loss = 1.5114e-01, PNorm = 49.0040, GNorm = 0.7695, lr_0 = 8.1747e-04
Loss = 1.4026e-01, PNorm = 49.0148, GNorm = 0.8934, lr_0 = 8.1691e-04
Loss = 1.4803e-01, PNorm = 49.0283, GNorm = 0.9105, lr_0 = 8.1635e-04
Loss = 1.4889e-01, PNorm = 49.0443, GNorm = 0.7036, lr_0 = 8.1579e-04
Loss = 1.4493e-01, PNorm = 49.0598, GNorm = 0.7794, lr_0 = 8.1523e-04
Loss = 1.4357e-01, PNorm = 49.0771, GNorm = 0.6128, lr_0 = 8.1467e-04
Loss = 1.5856e-01, PNorm = 49.1004, GNorm = 1.1415, lr_0 = 8.1411e-04
Loss = 1.5285e-01, PNorm = 49.1128, GNorm = 1.0784, lr_0 = 8.1355e-04
Loss = 1.3332e-01, PNorm = 49.1268, GNorm = 0.5536, lr_0 = 8.1300e-04
Loss = 1.5212e-01, PNorm = 49.1430, GNorm = 2.2096, lr_0 = 8.1244e-04
Loss = 1.5324e-01, PNorm = 49.1571, GNorm = 2.7725, lr_0 = 8.1188e-04
Loss = 1.5761e-01, PNorm = 49.1791, GNorm = 1.3826, lr_0 = 8.1133e-04
Loss = 1.3992e-01, PNorm = 49.2093, GNorm = 0.7886, lr_0 = 8.1077e-04
Loss = 1.6108e-01, PNorm = 49.2353, GNorm = 0.8929, lr_0 = 8.1022e-04
Loss = 1.6002e-01, PNorm = 49.2538, GNorm = 1.2272, lr_0 = 8.0966e-04
Loss = 1.9446e-01, PNorm = 49.2753, GNorm = 0.7024, lr_0 = 8.0911e-04
Loss = 1.6604e-01, PNorm = 49.2980, GNorm = 0.8364, lr_0 = 8.0855e-04
Loss = 1.4959e-01, PNorm = 49.3235, GNorm = 0.8551, lr_0 = 8.0800e-04
Loss = 1.3751e-01, PNorm = 49.3411, GNorm = 1.3267, lr_0 = 8.0745e-04
Loss = 1.5970e-01, PNorm = 49.3585, GNorm = 1.2746, lr_0 = 8.0689e-04
Loss = 1.6838e-01, PNorm = 49.3785, GNorm = 0.7597, lr_0 = 8.0634e-04
Loss = 1.5245e-01, PNorm = 49.4028, GNorm = 1.4254, lr_0 = 8.0579e-04
Loss = 1.4980e-01, PNorm = 49.4246, GNorm = 0.6456, lr_0 = 8.0523e-04
Loss = 1.6566e-01, PNorm = 49.4385, GNorm = 0.8457, lr_0 = 8.0468e-04
Loss = 1.4789e-01, PNorm = 49.4573, GNorm = 0.9253, lr_0 = 8.0413e-04
Loss = 1.5814e-01, PNorm = 49.4777, GNorm = 1.7166, lr_0 = 8.0358e-04
Loss = 1.3974e-01, PNorm = 49.4985, GNorm = 1.3943, lr_0 = 8.0303e-04
Loss = 1.4463e-01, PNorm = 49.5199, GNorm = 0.5031, lr_0 = 8.0248e-04
Loss = 1.5609e-01, PNorm = 49.5393, GNorm = 0.7826, lr_0 = 8.0193e-04
Loss = 1.3485e-01, PNorm = 49.5545, GNorm = 1.1598, lr_0 = 8.0138e-04
Loss = 1.3532e-01, PNorm = 49.5658, GNorm = 0.9771, lr_0 = 8.0083e-04
Loss = 1.3827e-01, PNorm = 49.5795, GNorm = 0.6948, lr_0 = 8.0028e-04
Loss = 1.5448e-01, PNorm = 49.5981, GNorm = 1.6158, lr_0 = 7.9974e-04
Loss = 1.4823e-01, PNorm = 49.6163, GNorm = 1.1357, lr_0 = 7.9919e-04
Loss = 1.4579e-01, PNorm = 49.6410, GNorm = 1.1831, lr_0 = 7.9864e-04
Loss = 1.5305e-01, PNorm = 49.6673, GNorm = 2.3200, lr_0 = 7.9809e-04
Loss = 1.5836e-01, PNorm = 49.6820, GNorm = 0.9907, lr_0 = 7.9755e-04
Loss = 1.5042e-01, PNorm = 49.7013, GNorm = 0.9894, lr_0 = 7.9700e-04
Loss = 1.5821e-01, PNorm = 49.7240, GNorm = 2.0954, lr_0 = 7.9645e-04
Loss = 1.5188e-01, PNorm = 49.7432, GNorm = 1.0565, lr_0 = 7.9591e-04
Loss = 1.5861e-01, PNorm = 49.7669, GNorm = 2.0013, lr_0 = 7.9536e-04
Loss = 1.4626e-01, PNorm = 49.7896, GNorm = 0.7143, lr_0 = 7.9482e-04
Loss = 1.5330e-01, PNorm = 49.8163, GNorm = 2.7238, lr_0 = 7.9427e-04
Loss = 1.6374e-01, PNorm = 49.8408, GNorm = 2.0422, lr_0 = 7.9373e-04
Loss = 1.4683e-01, PNorm = 49.8659, GNorm = 1.3380, lr_0 = 7.9319e-04
Loss = 1.7097e-01, PNorm = 49.8901, GNorm = 1.1822, lr_0 = 7.9264e-04
Loss = 1.6894e-01, PNorm = 49.9120, GNorm = 0.8961, lr_0 = 7.9210e-04
Loss = 1.6552e-01, PNorm = 49.9429, GNorm = 1.5107, lr_0 = 7.9156e-04
Loss = 1.4416e-01, PNorm = 49.9629, GNorm = 1.7788, lr_0 = 7.9101e-04
Loss = 1.4927e-01, PNorm = 49.9817, GNorm = 0.6752, lr_0 = 7.9047e-04
Loss = 1.4110e-01, PNorm = 50.0054, GNorm = 1.0984, lr_0 = 7.8993e-04
Loss = 1.4734e-01, PNorm = 50.0273, GNorm = 0.6547, lr_0 = 7.8939e-04
Loss = 1.2587e-01, PNorm = 50.0427, GNorm = 0.7037, lr_0 = 7.8885e-04
Loss = 1.3755e-01, PNorm = 50.0585, GNorm = 0.6300, lr_0 = 7.8831e-04
Loss = 1.5619e-01, PNorm = 50.0749, GNorm = 2.9068, lr_0 = 7.8777e-04
Loss = 1.4210e-01, PNorm = 50.0907, GNorm = 0.7424, lr_0 = 7.8723e-04
Loss = 1.4673e-01, PNorm = 50.1093, GNorm = 1.1430, lr_0 = 7.8669e-04
Loss = 1.6601e-01, PNorm = 50.1306, GNorm = 2.4176, lr_0 = 7.8615e-04
Loss = 1.5672e-01, PNorm = 50.1540, GNorm = 0.9333, lr_0 = 7.8561e-04
Loss = 1.5228e-01, PNorm = 50.1755, GNorm = 0.8608, lr_0 = 7.8507e-04
Loss = 1.5920e-01, PNorm = 50.1951, GNorm = 0.8970, lr_0 = 7.8454e-04
Loss = 1.4247e-01, PNorm = 50.2109, GNorm = 0.5487, lr_0 = 7.8400e-04
Loss = 1.4848e-01, PNorm = 50.2273, GNorm = 0.8645, lr_0 = 7.8346e-04
Loss = 1.3580e-01, PNorm = 50.2377, GNorm = 0.8005, lr_0 = 7.8293e-04
Loss = 1.3803e-01, PNorm = 50.2477, GNorm = 1.0592, lr_0 = 7.8239e-04
Loss = 1.3243e-01, PNorm = 50.2614, GNorm = 1.0893, lr_0 = 7.8185e-04
Loss = 1.4198e-01, PNorm = 50.2803, GNorm = 1.4792, lr_0 = 7.8132e-04
Validation mae = 0.437520
Epoch 5
Loss = 1.3966e-01, PNorm = 50.2989, GNorm = 1.3490, lr_0 = 7.8078e-04
Loss = 1.3882e-01, PNorm = 50.3179, GNorm = 0.5843, lr_0 = 7.8025e-04
Loss = 1.2336e-01, PNorm = 50.3390, GNorm = 0.6809, lr_0 = 7.7971e-04
Loss = 1.0954e-01, PNorm = 50.3563, GNorm = 1.3237, lr_0 = 7.7918e-04
Loss = 1.2321e-01, PNorm = 50.3695, GNorm = 2.1372, lr_0 = 7.7864e-04
Loss = 1.4906e-01, PNorm = 50.3915, GNorm = 1.5812, lr_0 = 7.7811e-04
Loss = 1.3157e-01, PNorm = 50.4142, GNorm = 0.6202, lr_0 = 7.7758e-04
Loss = 1.2798e-01, PNorm = 50.4338, GNorm = 1.1684, lr_0 = 7.7705e-04
Loss = 1.1854e-01, PNorm = 50.4510, GNorm = 0.8504, lr_0 = 7.7651e-04
Loss = 1.2702e-01, PNorm = 50.4630, GNorm = 1.6497, lr_0 = 7.7598e-04
Loss = 1.3901e-01, PNorm = 50.4787, GNorm = 2.5689, lr_0 = 7.7545e-04
Loss = 1.3476e-01, PNorm = 50.5000, GNorm = 0.7274, lr_0 = 7.7492e-04
Loss = 1.4574e-01, PNorm = 50.5158, GNorm = 0.6980, lr_0 = 7.7439e-04
Loss = 1.2195e-01, PNorm = 50.5384, GNorm = 1.8480, lr_0 = 7.7386e-04
Loss = 1.2067e-01, PNorm = 50.5616, GNorm = 1.1573, lr_0 = 7.7333e-04
Loss = 1.1632e-01, PNorm = 50.5760, GNorm = 1.1274, lr_0 = 7.7280e-04
Loss = 1.3165e-01, PNorm = 50.5911, GNorm = 0.8294, lr_0 = 7.7227e-04
Loss = 1.3135e-01, PNorm = 50.6048, GNorm = 1.2237, lr_0 = 7.7174e-04
Loss = 1.3630e-01, PNorm = 50.6167, GNorm = 2.3244, lr_0 = 7.7121e-04
Loss = 1.3485e-01, PNorm = 50.6421, GNorm = 1.5880, lr_0 = 7.7068e-04
Loss = 1.3029e-01, PNorm = 50.6675, GNorm = 0.5036, lr_0 = 7.7015e-04
Loss = 1.4941e-01, PNorm = 50.6912, GNorm = 1.1994, lr_0 = 7.6963e-04
Loss = 1.5188e-01, PNorm = 50.7230, GNorm = 1.5249, lr_0 = 7.6910e-04
Loss = 1.4544e-01, PNorm = 50.7466, GNorm = 1.0843, lr_0 = 7.6857e-04
Loss = 1.3370e-01, PNorm = 50.7670, GNorm = 1.6721, lr_0 = 7.6805e-04
Loss = 1.3692e-01, PNorm = 50.7805, GNorm = 1.9782, lr_0 = 7.6752e-04
Loss = 1.4461e-01, PNorm = 50.7993, GNorm = 1.6495, lr_0 = 7.6699e-04
Loss = 1.4019e-01, PNorm = 50.8248, GNorm = 1.0869, lr_0 = 7.6647e-04
Loss = 1.3031e-01, PNorm = 50.8417, GNorm = 1.0364, lr_0 = 7.6594e-04
Loss = 1.3788e-01, PNorm = 50.8645, GNorm = 0.6417, lr_0 = 7.6542e-04
Loss = 1.4414e-01, PNorm = 50.8816, GNorm = 1.0523, lr_0 = 7.6489e-04
Loss = 1.3773e-01, PNorm = 50.8994, GNorm = 1.5822, lr_0 = 7.6437e-04
Loss = 1.1180e-01, PNorm = 50.9197, GNorm = 0.5997, lr_0 = 7.6385e-04
Loss = 1.3386e-01, PNorm = 50.9397, GNorm = 0.5515, lr_0 = 7.6332e-04
Loss = 1.6417e-01, PNorm = 50.9589, GNorm = 2.5410, lr_0 = 7.6280e-04
Loss = 1.3638e-01, PNorm = 50.9766, GNorm = 1.2945, lr_0 = 7.6228e-04
Loss = 1.4044e-01, PNorm = 50.9904, GNorm = 0.8964, lr_0 = 7.6176e-04
Loss = 1.5391e-01, PNorm = 51.0034, GNorm = 1.7898, lr_0 = 7.6123e-04
Loss = 1.4823e-01, PNorm = 51.0277, GNorm = 1.3353, lr_0 = 7.6071e-04
Loss = 1.2372e-01, PNorm = 51.0479, GNorm = 0.6149, lr_0 = 7.6019e-04
Loss = 1.3440e-01, PNorm = 51.0653, GNorm = 0.7323, lr_0 = 7.5967e-04
Loss = 1.3126e-01, PNorm = 51.0836, GNorm = 0.6471, lr_0 = 7.5915e-04
Loss = 1.2500e-01, PNorm = 51.0976, GNorm = 0.6048, lr_0 = 7.5863e-04
Loss = 1.3142e-01, PNorm = 51.1193, GNorm = 1.3776, lr_0 = 7.5811e-04
Loss = 1.3051e-01, PNorm = 51.1422, GNorm = 0.6933, lr_0 = 7.5759e-04
Loss = 1.3204e-01, PNorm = 51.1580, GNorm = 0.6944, lr_0 = 7.5707e-04
Loss = 1.2849e-01, PNorm = 51.1760, GNorm = 1.8721, lr_0 = 7.5655e-04
Loss = 1.3702e-01, PNorm = 51.1947, GNorm = 1.1728, lr_0 = 7.5603e-04
Loss = 1.5389e-01, PNorm = 51.2153, GNorm = 1.4053, lr_0 = 7.5552e-04
Loss = 1.4045e-01, PNorm = 51.2334, GNorm = 0.7871, lr_0 = 7.5500e-04
Loss = 1.4389e-01, PNorm = 51.2501, GNorm = 0.7806, lr_0 = 7.5448e-04
Loss = 1.2354e-01, PNorm = 51.2747, GNorm = 0.8517, lr_0 = 7.5397e-04
Loss = 1.3806e-01, PNorm = 51.3006, GNorm = 0.8057, lr_0 = 7.5345e-04
Loss = 1.3888e-01, PNorm = 51.3181, GNorm = 1.1751, lr_0 = 7.5293e-04
Loss = 1.3411e-01, PNorm = 51.3352, GNorm = 2.0283, lr_0 = 7.5242e-04
Loss = 1.2632e-01, PNorm = 51.3534, GNorm = 0.8846, lr_0 = 7.5190e-04
Loss = 1.3180e-01, PNorm = 51.3684, GNorm = 0.6712, lr_0 = 7.5139e-04
Loss = 1.3236e-01, PNorm = 51.3815, GNorm = 1.7707, lr_0 = 7.5087e-04
Loss = 1.5500e-01, PNorm = 51.3977, GNorm = 0.6873, lr_0 = 7.5036e-04
Loss = 1.4065e-01, PNorm = 51.4129, GNorm = 1.3859, lr_0 = 7.4984e-04
Loss = 1.4192e-01, PNorm = 51.4333, GNorm = 1.2667, lr_0 = 7.4933e-04
Loss = 1.4267e-01, PNorm = 51.4523, GNorm = 0.9857, lr_0 = 7.4882e-04
Loss = 1.3739e-01, PNorm = 51.4703, GNorm = 0.7548, lr_0 = 7.4830e-04
Loss = 1.3582e-01, PNorm = 51.4929, GNorm = 1.1296, lr_0 = 7.4779e-04
Loss = 1.4961e-01, PNorm = 51.5085, GNorm = 0.9221, lr_0 = 7.4728e-04
Loss = 1.6501e-01, PNorm = 51.5233, GNorm = 1.2531, lr_0 = 7.4677e-04
Loss = 1.4997e-01, PNorm = 51.5461, GNorm = 1.7732, lr_0 = 7.4625e-04
Loss = 1.4879e-01, PNorm = 51.5707, GNorm = 0.7713, lr_0 = 7.4574e-04
Loss = 1.3486e-01, PNorm = 51.5904, GNorm = 1.0323, lr_0 = 7.4523e-04
Loss = 1.5879e-01, PNorm = 51.6049, GNorm = 0.8380, lr_0 = 7.4472e-04
Loss = 1.3849e-01, PNorm = 51.6248, GNorm = 0.6195, lr_0 = 7.4421e-04
Loss = 1.2122e-01, PNorm = 51.6394, GNorm = 0.7736, lr_0 = 7.4370e-04
Loss = 1.4111e-01, PNorm = 51.6543, GNorm = 1.1202, lr_0 = 7.4319e-04
Loss = 1.4074e-01, PNorm = 51.6738, GNorm = 2.4611, lr_0 = 7.4268e-04
Loss = 1.2975e-01, PNorm = 51.6958, GNorm = 0.5472, lr_0 = 7.4217e-04
Loss = 1.3431e-01, PNorm = 51.7147, GNorm = 1.1140, lr_0 = 7.4167e-04
Loss = 1.2584e-01, PNorm = 51.7334, GNorm = 0.6002, lr_0 = 7.4116e-04
Loss = 1.4578e-01, PNorm = 51.7546, GNorm = 1.1885, lr_0 = 7.4065e-04
Loss = 1.4686e-01, PNorm = 51.7711, GNorm = 1.0821, lr_0 = 7.4014e-04
Loss = 1.3330e-01, PNorm = 51.7856, GNorm = 1.3131, lr_0 = 7.3964e-04
Loss = 1.3185e-01, PNorm = 51.8100, GNorm = 0.7001, lr_0 = 7.3913e-04
Loss = 1.3055e-01, PNorm = 51.8319, GNorm = 0.7968, lr_0 = 7.3862e-04
Loss = 1.4458e-01, PNorm = 51.8471, GNorm = 0.7754, lr_0 = 7.3812e-04
Loss = 1.4807e-01, PNorm = 51.8634, GNorm = 2.1404, lr_0 = 7.3761e-04
Loss = 1.5605e-01, PNorm = 51.8894, GNorm = 0.6269, lr_0 = 7.3711e-04
Loss = 1.7490e-01, PNorm = 51.9103, GNorm = 1.2180, lr_0 = 7.3660e-04
Loss = 1.4218e-01, PNorm = 51.9289, GNorm = 0.7678, lr_0 = 7.3610e-04
Loss = 1.3094e-01, PNorm = 51.9500, GNorm = 0.9415, lr_0 = 7.3559e-04
Loss = 1.4394e-01, PNorm = 51.9685, GNorm = 0.7759, lr_0 = 7.3509e-04
Loss = 1.2723e-01, PNorm = 51.9899, GNorm = 2.8324, lr_0 = 7.3458e-04
Loss = 1.2623e-01, PNorm = 52.0113, GNorm = 1.4332, lr_0 = 7.3408e-04
Loss = 1.4818e-01, PNorm = 52.0319, GNorm = 1.0964, lr_0 = 7.3358e-04
Loss = 1.2865e-01, PNorm = 52.0534, GNorm = 0.7782, lr_0 = 7.3308e-04
Loss = 1.3458e-01, PNorm = 52.0724, GNorm = 0.5520, lr_0 = 7.3257e-04
Loss = 1.2215e-01, PNorm = 52.0839, GNorm = 1.1231, lr_0 = 7.3207e-04
Loss = 1.2959e-01, PNorm = 52.0922, GNorm = 0.6524, lr_0 = 7.3157e-04
Loss = 1.2725e-01, PNorm = 52.1079, GNorm = 0.9134, lr_0 = 7.3107e-04
Loss = 1.3008e-01, PNorm = 52.1341, GNorm = 0.8941, lr_0 = 7.3057e-04
Loss = 1.4668e-01, PNorm = 52.1534, GNorm = 2.8416, lr_0 = 7.3007e-04
Loss = 1.3158e-01, PNorm = 52.1777, GNorm = 1.7368, lr_0 = 7.2957e-04
Loss = 1.3193e-01, PNorm = 52.1956, GNorm = 1.0070, lr_0 = 7.2907e-04
Loss = 1.2778e-01, PNorm = 52.2169, GNorm = 0.9387, lr_0 = 7.2857e-04
Loss = 1.3781e-01, PNorm = 52.2358, GNorm = 1.5193, lr_0 = 7.2807e-04
Loss = 1.3911e-01, PNorm = 52.2477, GNorm = 1.2024, lr_0 = 7.2757e-04
Loss = 1.1175e-01, PNorm = 52.2600, GNorm = 0.6435, lr_0 = 7.2707e-04
Loss = 1.4579e-01, PNorm = 52.2721, GNorm = 0.8156, lr_0 = 7.2657e-04
Loss = 1.3208e-01, PNorm = 52.2866, GNorm = 0.7608, lr_0 = 7.2608e-04
Loss = 1.5777e-01, PNorm = 52.3048, GNorm = 0.6902, lr_0 = 7.2558e-04
Loss = 1.4709e-01, PNorm = 52.3277, GNorm = 0.7613, lr_0 = 7.2508e-04
Loss = 1.3891e-01, PNorm = 52.3517, GNorm = 0.7710, lr_0 = 7.2458e-04
Loss = 1.3871e-01, PNorm = 52.3718, GNorm = 2.0435, lr_0 = 7.2409e-04
Loss = 1.5100e-01, PNorm = 52.3898, GNorm = 1.0700, lr_0 = 7.2359e-04
Loss = 1.2970e-01, PNorm = 52.4120, GNorm = 0.8051, lr_0 = 7.2310e-04
Loss = 1.2786e-01, PNorm = 52.4309, GNorm = 0.6812, lr_0 = 7.2260e-04
Loss = 1.1986e-01, PNorm = 52.4411, GNorm = 0.9359, lr_0 = 7.2211e-04
Loss = 1.3429e-01, PNorm = 52.4540, GNorm = 1.3991, lr_0 = 7.2161e-04
Loss = 1.3033e-01, PNorm = 52.4610, GNorm = 0.7295, lr_0 = 7.2112e-04
Loss = 1.1891e-01, PNorm = 52.4734, GNorm = 1.4058, lr_0 = 7.2062e-04
Loss = 1.5836e-01, PNorm = 52.4926, GNorm = 1.5986, lr_0 = 7.2013e-04
Loss = 1.4266e-01, PNorm = 52.5187, GNorm = 0.7985, lr_0 = 7.1964e-04
Validation mae = 0.450202
Epoch 6
Loss = 1.2172e-01, PNorm = 52.5409, GNorm = 0.5587, lr_0 = 7.1914e-04
Loss = 1.1624e-01, PNorm = 52.5591, GNorm = 0.6520, lr_0 = 7.1865e-04
Loss = 1.2253e-01, PNorm = 52.5780, GNorm = 1.1654, lr_0 = 7.1816e-04
Loss = 1.2164e-01, PNorm = 52.5995, GNorm = 1.1105, lr_0 = 7.1767e-04
Loss = 1.3693e-01, PNorm = 52.6151, GNorm = 1.8780, lr_0 = 7.1717e-04
Loss = 1.3838e-01, PNorm = 52.6350, GNorm = 1.1961, lr_0 = 7.1668e-04
Loss = 1.1219e-01, PNorm = 52.6521, GNorm = 0.7535, lr_0 = 7.1619e-04
Loss = 1.2108e-01, PNorm = 52.6708, GNorm = 0.6805, lr_0 = 7.1570e-04
Loss = 1.1127e-01, PNorm = 52.6843, GNorm = 0.5815, lr_0 = 7.1521e-04
Loss = 1.1348e-01, PNorm = 52.6951, GNorm = 1.9857, lr_0 = 7.1472e-04
Loss = 1.1836e-01, PNorm = 52.7141, GNorm = 0.8614, lr_0 = 7.1423e-04
Loss = 1.1519e-01, PNorm = 52.7297, GNorm = 0.9486, lr_0 = 7.1374e-04
Loss = 1.1068e-01, PNorm = 52.7443, GNorm = 0.8044, lr_0 = 7.1325e-04
Loss = 1.2545e-01, PNorm = 52.7662, GNorm = 1.2064, lr_0 = 7.1277e-04
Loss = 1.1559e-01, PNorm = 52.7866, GNorm = 1.1528, lr_0 = 7.1228e-04
Loss = 1.3429e-01, PNorm = 52.8113, GNorm = 1.1222, lr_0 = 7.1179e-04
Loss = 1.2345e-01, PNorm = 52.8315, GNorm = 1.0655, lr_0 = 7.1130e-04
Loss = 1.1364e-01, PNorm = 52.8438, GNorm = 1.5784, lr_0 = 7.1081e-04
Loss = 1.1231e-01, PNorm = 52.8602, GNorm = 0.3799, lr_0 = 7.1033e-04
Loss = 1.5002e-01, PNorm = 52.8852, GNorm = 0.8153, lr_0 = 7.0984e-04
Loss = 1.1313e-01, PNorm = 52.9007, GNorm = 0.5714, lr_0 = 7.0935e-04
Loss = 1.1739e-01, PNorm = 52.9243, GNorm = 0.4534, lr_0 = 7.0887e-04
Loss = 1.1782e-01, PNorm = 52.9433, GNorm = 1.7110, lr_0 = 7.0838e-04
Loss = 1.2056e-01, PNorm = 52.9613, GNorm = 0.8475, lr_0 = 7.0790e-04
Loss = 1.2472e-01, PNorm = 52.9759, GNorm = 1.1277, lr_0 = 7.0741e-04
Loss = 1.3351e-01, PNorm = 52.9916, GNorm = 1.8019, lr_0 = 7.0693e-04
Loss = 8.8867e-02, PNorm = 53.0061, GNorm = 0.4904, lr_0 = 7.0644e-04
Loss = 1.2489e-01, PNorm = 53.0169, GNorm = 0.6408, lr_0 = 7.0596e-04
Loss = 1.2238e-01, PNorm = 53.0333, GNorm = 1.9589, lr_0 = 7.0548e-04
Loss = 1.1164e-01, PNorm = 53.0540, GNorm = 0.8154, lr_0 = 7.0499e-04
Loss = 1.2415e-01, PNorm = 53.0739, GNorm = 1.0478, lr_0 = 7.0451e-04
Loss = 1.3946e-01, PNorm = 53.0896, GNorm = 1.0564, lr_0 = 7.0403e-04
Loss = 1.4908e-01, PNorm = 53.1093, GNorm = 1.1438, lr_0 = 7.0354e-04
Loss = 1.1139e-01, PNorm = 53.1304, GNorm = 0.6340, lr_0 = 7.0306e-04
Loss = 1.1663e-01, PNorm = 53.1499, GNorm = 0.6283, lr_0 = 7.0258e-04
Loss = 1.3688e-01, PNorm = 53.1623, GNorm = 1.0318, lr_0 = 7.0210e-04
Loss = 1.0408e-01, PNorm = 53.1729, GNorm = 0.8804, lr_0 = 7.0162e-04
Loss = 1.4275e-01, PNorm = 53.1906, GNorm = 0.9382, lr_0 = 7.0114e-04
Loss = 1.6378e-01, PNorm = 53.2142, GNorm = 1.9885, lr_0 = 7.0066e-04
Loss = 1.6222e-01, PNorm = 53.2413, GNorm = 0.7065, lr_0 = 7.0018e-04
Loss = 1.4312e-01, PNorm = 53.2584, GNorm = 1.2540, lr_0 = 6.9970e-04
Loss = 1.3356e-01, PNorm = 53.2755, GNorm = 0.8903, lr_0 = 6.9922e-04
Loss = 1.4102e-01, PNorm = 53.3023, GNorm = 1.8784, lr_0 = 6.9874e-04
Loss = 1.3420e-01, PNorm = 53.3314, GNorm = 0.4560, lr_0 = 6.9826e-04
Loss = 1.0202e-01, PNorm = 53.3517, GNorm = 0.9482, lr_0 = 6.9778e-04
Loss = 1.1131e-01, PNorm = 53.3670, GNorm = 0.6727, lr_0 = 6.9730e-04
Loss = 1.1407e-01, PNorm = 53.3827, GNorm = 0.5629, lr_0 = 6.9683e-04
Loss = 1.1093e-01, PNorm = 53.3956, GNorm = 1.0618, lr_0 = 6.9635e-04
Loss = 1.0821e-01, PNorm = 53.4112, GNorm = 1.1418, lr_0 = 6.9587e-04
Loss = 1.1613e-01, PNorm = 53.4252, GNorm = 1.0233, lr_0 = 6.9540e-04
Loss = 1.2386e-01, PNorm = 53.4372, GNorm = 0.8492, lr_0 = 6.9492e-04
Loss = 1.2777e-01, PNorm = 53.4510, GNorm = 0.7713, lr_0 = 6.9444e-04
Loss = 1.4996e-01, PNorm = 53.4672, GNorm = 0.8520, lr_0 = 6.9397e-04
Loss = 1.6720e-01, PNorm = 53.4845, GNorm = 1.2525, lr_0 = 6.9349e-04
Loss = 1.2214e-01, PNorm = 53.5021, GNorm = 1.3613, lr_0 = 6.9302e-04
Loss = 1.3142e-01, PNorm = 53.5125, GNorm = 0.6484, lr_0 = 6.9254e-04
Loss = 1.2436e-01, PNorm = 53.5311, GNorm = 1.5780, lr_0 = 6.9207e-04
Loss = 1.3747e-01, PNorm = 53.5571, GNorm = 0.8199, lr_0 = 6.9159e-04
Loss = 1.2403e-01, PNorm = 53.5841, GNorm = 0.9500, lr_0 = 6.9112e-04
Loss = 1.1243e-01, PNorm = 53.6004, GNorm = 0.8493, lr_0 = 6.9065e-04
Loss = 1.3675e-01, PNorm = 53.6133, GNorm = 2.1511, lr_0 = 6.9017e-04
Loss = 1.2106e-01, PNorm = 53.6343, GNorm = 1.4690, lr_0 = 6.8970e-04
Loss = 1.2867e-01, PNorm = 53.6548, GNorm = 1.0575, lr_0 = 6.8923e-04
Loss = 1.2159e-01, PNorm = 53.6732, GNorm = 0.8178, lr_0 = 6.8876e-04
Loss = 1.3290e-01, PNorm = 53.6919, GNorm = 1.2108, lr_0 = 6.8828e-04
Loss = 1.1252e-01, PNorm = 53.7128, GNorm = 0.8915, lr_0 = 6.8781e-04
Loss = 1.1904e-01, PNorm = 53.7309, GNorm = 0.8037, lr_0 = 6.8734e-04
Loss = 1.2727e-01, PNorm = 53.7392, GNorm = 1.2854, lr_0 = 6.8687e-04
Loss = 1.3552e-01, PNorm = 53.7537, GNorm = 1.0115, lr_0 = 6.8640e-04
Loss = 1.3075e-01, PNorm = 53.7790, GNorm = 1.4533, lr_0 = 6.8593e-04
Loss = 1.2611e-01, PNorm = 53.7947, GNorm = 1.0147, lr_0 = 6.8546e-04
Loss = 1.3567e-01, PNorm = 53.8100, GNorm = 0.9407, lr_0 = 6.8499e-04
Loss = 1.2600e-01, PNorm = 53.8280, GNorm = 1.9293, lr_0 = 6.8452e-04
Loss = 1.3346e-01, PNorm = 53.8507, GNorm = 1.6332, lr_0 = 6.8405e-04
Loss = 1.1203e-01, PNorm = 53.8714, GNorm = 0.5222, lr_0 = 6.8358e-04
Loss = 1.0186e-01, PNorm = 53.8852, GNorm = 0.8559, lr_0 = 6.8312e-04
Loss = 1.0988e-01, PNorm = 53.9039, GNorm = 0.6097, lr_0 = 6.8265e-04
Loss = 1.2138e-01, PNorm = 53.9223, GNorm = 0.9948, lr_0 = 6.8218e-04
Loss = 1.2536e-01, PNorm = 53.9368, GNorm = 1.1694, lr_0 = 6.8171e-04
Loss = 1.2900e-01, PNorm = 53.9544, GNorm = 0.7350, lr_0 = 6.8125e-04
Loss = 1.3523e-01, PNorm = 53.9792, GNorm = 1.3539, lr_0 = 6.8078e-04
Loss = 1.0582e-01, PNorm = 53.9939, GNorm = 0.9611, lr_0 = 6.8031e-04
Loss = 1.1991e-01, PNorm = 54.0080, GNorm = 0.8620, lr_0 = 6.7985e-04
Loss = 1.0616e-01, PNorm = 54.0227, GNorm = 0.8334, lr_0 = 6.7938e-04
Loss = 1.4197e-01, PNorm = 54.0409, GNorm = 1.5540, lr_0 = 6.7892e-04
Loss = 1.2118e-01, PNorm = 54.0576, GNorm = 1.0808, lr_0 = 6.7845e-04
Loss = 1.1721e-01, PNorm = 54.0720, GNorm = 1.4700, lr_0 = 6.7799e-04
Loss = 1.1651e-01, PNorm = 54.0848, GNorm = 0.7177, lr_0 = 6.7752e-04
Loss = 1.3167e-01, PNorm = 54.1013, GNorm = 0.4323, lr_0 = 6.7706e-04
Loss = 1.3035e-01, PNorm = 54.1183, GNorm = 1.1469, lr_0 = 6.7659e-04
Loss = 1.4299e-01, PNorm = 54.1412, GNorm = 1.3029, lr_0 = 6.7613e-04
Loss = 1.2919e-01, PNorm = 54.1595, GNorm = 0.7682, lr_0 = 6.7567e-04
Loss = 1.2449e-01, PNorm = 54.1723, GNorm = 0.5741, lr_0 = 6.7520e-04
Loss = 1.2471e-01, PNorm = 54.1849, GNorm = 0.9596, lr_0 = 6.7474e-04
Loss = 1.2241e-01, PNorm = 54.1954, GNorm = 0.7318, lr_0 = 6.7428e-04
Loss = 1.3365e-01, PNorm = 54.2054, GNorm = 1.4187, lr_0 = 6.7382e-04
Loss = 1.4267e-01, PNorm = 54.2171, GNorm = 1.7362, lr_0 = 6.7335e-04
Loss = 1.5017e-01, PNorm = 54.2341, GNorm = 1.2335, lr_0 = 6.7289e-04
Loss = 1.6717e-01, PNorm = 54.2556, GNorm = 1.2617, lr_0 = 6.7243e-04
Loss = 1.1860e-01, PNorm = 54.2815, GNorm = 1.3396, lr_0 = 6.7197e-04
Loss = 1.2597e-01, PNorm = 54.3011, GNorm = 1.5285, lr_0 = 6.7151e-04
Loss = 1.3405e-01, PNorm = 54.3200, GNorm = 0.7312, lr_0 = 6.7105e-04
Loss = 1.4012e-01, PNorm = 54.3402, GNorm = 0.9023, lr_0 = 6.7059e-04
Loss = 1.3473e-01, PNorm = 54.3631, GNorm = 0.5238, lr_0 = 6.7013e-04
Loss = 1.3794e-01, PNorm = 54.3843, GNorm = 1.3740, lr_0 = 6.6967e-04
Loss = 1.2355e-01, PNorm = 54.3974, GNorm = 0.9340, lr_0 = 6.6921e-04
Loss = 1.2031e-01, PNorm = 54.4073, GNorm = 0.8405, lr_0 = 6.6876e-04
Loss = 1.1876e-01, PNorm = 54.4188, GNorm = 0.6900, lr_0 = 6.6830e-04
Loss = 1.0992e-01, PNorm = 54.4310, GNorm = 0.5236, lr_0 = 6.6784e-04
Loss = 1.1325e-01, PNorm = 54.4431, GNorm = 1.0480, lr_0 = 6.6738e-04
Loss = 1.1824e-01, PNorm = 54.4535, GNorm = 0.6978, lr_0 = 6.6693e-04
Loss = 1.1120e-01, PNorm = 54.4699, GNorm = 0.9189, lr_0 = 6.6647e-04
Loss = 1.3791e-01, PNorm = 54.4865, GNorm = 0.8201, lr_0 = 6.6601e-04
Loss = 1.2318e-01, PNorm = 54.5025, GNorm = 0.9998, lr_0 = 6.6556e-04
Loss = 1.1514e-01, PNorm = 54.5126, GNorm = 0.9883, lr_0 = 6.6510e-04
Loss = 1.2430e-01, PNorm = 54.5235, GNorm = 1.1096, lr_0 = 6.6464e-04
Loss = 1.5020e-01, PNorm = 54.5410, GNorm = 0.6829, lr_0 = 6.6419e-04
Loss = 1.3452e-01, PNorm = 54.5576, GNorm = 1.1977, lr_0 = 6.6373e-04
Loss = 1.2040e-01, PNorm = 54.5676, GNorm = 1.2912, lr_0 = 6.6328e-04
Loss = 1.2516e-01, PNorm = 54.5807, GNorm = 0.5267, lr_0 = 6.6282e-04
Validation mae = 0.435693
Epoch 7
Loss = 1.1944e-01, PNorm = 54.5948, GNorm = 0.7517, lr_0 = 6.6237e-04
Loss = 1.1971e-01, PNorm = 54.6098, GNorm = 1.1683, lr_0 = 6.6192e-04
Loss = 1.2550e-01, PNorm = 54.6248, GNorm = 0.5955, lr_0 = 6.6146e-04
Loss = 1.1999e-01, PNorm = 54.6485, GNorm = 0.6169, lr_0 = 6.6101e-04
Loss = 1.2411e-01, PNorm = 54.6734, GNorm = 0.8784, lr_0 = 6.6056e-04
Loss = 1.0051e-01, PNorm = 54.6923, GNorm = 0.5028, lr_0 = 6.6011e-04
Loss = 1.1487e-01, PNorm = 54.7067, GNorm = 0.6833, lr_0 = 6.5965e-04
Loss = 1.0363e-01, PNorm = 54.7182, GNorm = 0.5801, lr_0 = 6.5920e-04
Loss = 1.2274e-01, PNorm = 54.7301, GNorm = 1.6259, lr_0 = 6.5875e-04
Loss = 1.0873e-01, PNorm = 54.7510, GNorm = 0.5324, lr_0 = 6.5830e-04
Loss = 1.1166e-01, PNorm = 54.7691, GNorm = 0.5696, lr_0 = 6.5785e-04
Loss = 1.1057e-01, PNorm = 54.7854, GNorm = 0.9634, lr_0 = 6.5740e-04
Loss = 1.1189e-01, PNorm = 54.8040, GNorm = 1.5373, lr_0 = 6.5695e-04
Loss = 1.2221e-01, PNorm = 54.8222, GNorm = 0.5988, lr_0 = 6.5650e-04
Loss = 1.1028e-01, PNorm = 54.8359, GNorm = 1.0333, lr_0 = 6.5605e-04
Loss = 1.2488e-01, PNorm = 54.8486, GNorm = 0.8179, lr_0 = 6.5560e-04
Loss = 1.2158e-01, PNorm = 54.8651, GNorm = 0.5964, lr_0 = 6.5515e-04
Loss = 1.1807e-01, PNorm = 54.8839, GNorm = 0.9615, lr_0 = 6.5470e-04
Loss = 1.0940e-01, PNorm = 54.8982, GNorm = 1.3142, lr_0 = 6.5425e-04
Loss = 1.1904e-01, PNorm = 54.9172, GNorm = 0.7127, lr_0 = 6.5380e-04
Loss = 1.1194e-01, PNorm = 54.9330, GNorm = 0.7939, lr_0 = 6.5335e-04
Loss = 1.1986e-01, PNorm = 54.9537, GNorm = 0.8257, lr_0 = 6.5291e-04
Loss = 1.0979e-01, PNorm = 54.9741, GNorm = 0.7920, lr_0 = 6.5246e-04
Loss = 1.3338e-01, PNorm = 54.9926, GNorm = 1.4130, lr_0 = 6.5201e-04
Loss = 1.2420e-01, PNorm = 55.0120, GNorm = 0.7454, lr_0 = 6.5157e-04
Loss = 1.2063e-01, PNorm = 55.0312, GNorm = 0.6077, lr_0 = 6.5112e-04
Loss = 1.2384e-01, PNorm = 55.0509, GNorm = 1.8827, lr_0 = 6.5067e-04
Loss = 1.1203e-01, PNorm = 55.0661, GNorm = 0.8821, lr_0 = 6.5023e-04
Loss = 1.1831e-01, PNorm = 55.0896, GNorm = 1.4344, lr_0 = 6.4978e-04
Loss = 1.1263e-01, PNorm = 55.1081, GNorm = 0.5607, lr_0 = 6.4934e-04
Loss = 1.3314e-01, PNorm = 55.1238, GNorm = 1.2261, lr_0 = 6.4889e-04
Loss = 1.0741e-01, PNorm = 55.1366, GNorm = 0.7607, lr_0 = 6.4845e-04
Loss = 1.0431e-01, PNorm = 55.1477, GNorm = 0.5645, lr_0 = 6.4800e-04
Loss = 1.1508e-01, PNorm = 55.1633, GNorm = 0.5468, lr_0 = 6.4756e-04
Loss = 1.1197e-01, PNorm = 55.1813, GNorm = 0.9827, lr_0 = 6.4712e-04
Loss = 1.5506e-01, PNorm = 55.2015, GNorm = 0.8007, lr_0 = 6.4667e-04
Loss = 1.2643e-01, PNorm = 55.2245, GNorm = 1.8853, lr_0 = 6.4623e-04
Loss = 1.1174e-01, PNorm = 55.2411, GNorm = 0.8100, lr_0 = 6.4579e-04
Loss = 1.1444e-01, PNorm = 55.2600, GNorm = 1.5148, lr_0 = 6.4534e-04
Loss = 1.3035e-01, PNorm = 55.2765, GNorm = 1.3327, lr_0 = 6.4490e-04
Loss = 1.2189e-01, PNorm = 55.2932, GNorm = 0.9990, lr_0 = 6.4446e-04
Loss = 1.1871e-01, PNorm = 55.3112, GNorm = 0.6128, lr_0 = 6.4402e-04
Loss = 1.2119e-01, PNorm = 55.3320, GNorm = 0.7465, lr_0 = 6.4358e-04
Loss = 1.0111e-01, PNorm = 55.3517, GNorm = 0.6567, lr_0 = 6.4314e-04
Loss = 1.1365e-01, PNorm = 55.3707, GNorm = 0.9839, lr_0 = 6.4270e-04
Loss = 1.2365e-01, PNorm = 55.3881, GNorm = 1.7507, lr_0 = 6.4226e-04
Loss = 1.2540e-01, PNorm = 55.4127, GNorm = 1.4711, lr_0 = 6.4182e-04
Loss = 1.2059e-01, PNorm = 55.4262, GNorm = 0.6276, lr_0 = 6.4138e-04
Loss = 1.1576e-01, PNorm = 55.4382, GNorm = 0.5534, lr_0 = 6.4094e-04
Loss = 1.1964e-01, PNorm = 55.4584, GNorm = 0.8896, lr_0 = 6.4050e-04
Loss = 1.0936e-01, PNorm = 55.4840, GNorm = 1.9464, lr_0 = 6.4006e-04
Loss = 1.1988e-01, PNorm = 55.5013, GNorm = 1.3239, lr_0 = 6.3962e-04
Loss = 1.3798e-01, PNorm = 55.5160, GNorm = 0.6701, lr_0 = 6.3918e-04
Loss = 1.1951e-01, PNorm = 55.5369, GNorm = 1.2155, lr_0 = 6.3874e-04
Loss = 1.0624e-01, PNorm = 55.5514, GNorm = 0.6459, lr_0 = 6.3831e-04
Loss = 1.1256e-01, PNorm = 55.5620, GNorm = 0.7240, lr_0 = 6.3787e-04
Loss = 1.2954e-01, PNorm = 55.5788, GNorm = 1.1712, lr_0 = 6.3743e-04
Loss = 1.2507e-01, PNorm = 55.5979, GNorm = 0.7511, lr_0 = 6.3700e-04
Loss = 1.2626e-01, PNorm = 55.6114, GNorm = 0.6919, lr_0 = 6.3656e-04
Loss = 1.0825e-01, PNorm = 55.6217, GNorm = 0.7833, lr_0 = 6.3612e-04
Loss = 1.0465e-01, PNorm = 55.6361, GNorm = 1.2722, lr_0 = 6.3569e-04
Loss = 1.0852e-01, PNorm = 55.6534, GNorm = 0.9614, lr_0 = 6.3525e-04
Loss = 1.3082e-01, PNorm = 55.6680, GNorm = 1.0916, lr_0 = 6.3482e-04
Loss = 1.1151e-01, PNorm = 55.6759, GNorm = 1.3574, lr_0 = 6.3438e-04
Loss = 1.2871e-01, PNorm = 55.6857, GNorm = 0.8918, lr_0 = 6.3395e-04
Loss = 1.1595e-01, PNorm = 55.7019, GNorm = 1.3556, lr_0 = 6.3351e-04
Loss = 1.1367e-01, PNorm = 55.7211, GNorm = 0.6532, lr_0 = 6.3308e-04
Loss = 9.5946e-02, PNorm = 55.7384, GNorm = 0.7248, lr_0 = 6.3265e-04
Loss = 1.2067e-01, PNorm = 55.7472, GNorm = 0.6409, lr_0 = 6.3221e-04
Loss = 1.0699e-01, PNorm = 55.7551, GNorm = 0.9803, lr_0 = 6.3178e-04
Loss = 1.2862e-01, PNorm = 55.7654, GNorm = 0.6067, lr_0 = 6.3135e-04
Loss = 1.2173e-01, PNorm = 55.7798, GNorm = 1.1786, lr_0 = 6.3091e-04
Loss = 1.2155e-01, PNorm = 55.7978, GNorm = 1.4855, lr_0 = 6.3048e-04
Loss = 1.0879e-01, PNorm = 55.8142, GNorm = 1.4497, lr_0 = 6.3005e-04
Loss = 1.1211e-01, PNorm = 55.8290, GNorm = 0.5870, lr_0 = 6.2962e-04
Loss = 1.0329e-01, PNorm = 55.8450, GNorm = 0.6442, lr_0 = 6.2919e-04
Loss = 1.0596e-01, PNorm = 55.8579, GNorm = 0.8762, lr_0 = 6.2876e-04
Loss = 1.2171e-01, PNorm = 55.8718, GNorm = 1.0003, lr_0 = 6.2833e-04
Loss = 1.0385e-01, PNorm = 55.8859, GNorm = 0.6061, lr_0 = 6.2789e-04
Loss = 9.9413e-02, PNorm = 55.9007, GNorm = 1.1031, lr_0 = 6.2746e-04
Loss = 1.1090e-01, PNorm = 55.9116, GNorm = 0.5958, lr_0 = 6.2703e-04
Loss = 1.1635e-01, PNorm = 55.9254, GNorm = 0.6808, lr_0 = 6.2661e-04
Loss = 1.0745e-01, PNorm = 55.9377, GNorm = 0.8673, lr_0 = 6.2618e-04
Loss = 1.1229e-01, PNorm = 55.9521, GNorm = 0.6880, lr_0 = 6.2575e-04
Loss = 1.3644e-01, PNorm = 55.9684, GNorm = 0.7306, lr_0 = 6.2532e-04
Loss = 1.1353e-01, PNorm = 55.9798, GNorm = 1.0030, lr_0 = 6.2489e-04
Loss = 1.1639e-01, PNorm = 55.9937, GNorm = 0.6959, lr_0 = 6.2446e-04
Loss = 1.2726e-01, PNorm = 56.0108, GNorm = 1.4591, lr_0 = 6.2403e-04
Loss = 1.2580e-01, PNorm = 56.0262, GNorm = 1.0096, lr_0 = 6.2361e-04
Loss = 1.1125e-01, PNorm = 56.0391, GNorm = 0.5520, lr_0 = 6.2318e-04
Loss = 1.2967e-01, PNorm = 56.0498, GNorm = 1.2477, lr_0 = 6.2275e-04
Loss = 1.1115e-01, PNorm = 56.0623, GNorm = 0.6201, lr_0 = 6.2233e-04
Loss = 1.0851e-01, PNorm = 56.0754, GNorm = 0.6690, lr_0 = 6.2190e-04
Loss = 1.1831e-01, PNorm = 56.0942, GNorm = 1.6852, lr_0 = 6.2147e-04
Loss = 1.1824e-01, PNorm = 56.1158, GNorm = 1.0804, lr_0 = 6.2105e-04
Loss = 1.3652e-01, PNorm = 56.1385, GNorm = 0.5923, lr_0 = 6.2062e-04
Loss = 9.0013e-02, PNorm = 56.1554, GNorm = 0.9133, lr_0 = 6.2020e-04
Loss = 1.2240e-01, PNorm = 56.1662, GNorm = 1.2453, lr_0 = 6.1977e-04
Loss = 1.2580e-01, PNorm = 56.1830, GNorm = 0.6150, lr_0 = 6.1935e-04
Loss = 1.2184e-01, PNorm = 56.2005, GNorm = 1.3552, lr_0 = 6.1892e-04
Loss = 1.1083e-01, PNorm = 56.2114, GNorm = 0.7562, lr_0 = 6.1850e-04
Loss = 9.8203e-02, PNorm = 56.2222, GNorm = 0.5543, lr_0 = 6.1808e-04
Loss = 1.2866e-01, PNorm = 56.2386, GNorm = 1.2460, lr_0 = 6.1765e-04
Loss = 1.2527e-01, PNorm = 56.2629, GNorm = 1.0170, lr_0 = 6.1723e-04
Loss = 1.0623e-01, PNorm = 56.2831, GNorm = 1.0961, lr_0 = 6.1681e-04
Loss = 1.4011e-01, PNorm = 56.2942, GNorm = 1.0490, lr_0 = 6.1638e-04
Loss = 1.1734e-01, PNorm = 56.3080, GNorm = 0.8323, lr_0 = 6.1596e-04
Loss = 1.1702e-01, PNorm = 56.3213, GNorm = 0.6293, lr_0 = 6.1554e-04
Loss = 9.4240e-02, PNorm = 56.3354, GNorm = 1.0741, lr_0 = 6.1512e-04
Loss = 1.1746e-01, PNorm = 56.3528, GNorm = 0.8860, lr_0 = 6.1470e-04
Loss = 1.1762e-01, PNorm = 56.3675, GNorm = 0.6657, lr_0 = 6.1428e-04
Loss = 1.0720e-01, PNorm = 56.3808, GNorm = 0.8998, lr_0 = 6.1385e-04
Loss = 1.4598e-01, PNorm = 56.3992, GNorm = 1.9315, lr_0 = 6.1343e-04
Loss = 1.2490e-01, PNorm = 56.4214, GNorm = 0.6853, lr_0 = 6.1301e-04
Loss = 1.5657e-01, PNorm = 56.4490, GNorm = 0.6118, lr_0 = 6.1259e-04
Loss = 1.0869e-01, PNorm = 56.4785, GNorm = 0.9562, lr_0 = 6.1217e-04
Loss = 1.1569e-01, PNorm = 56.4999, GNorm = 0.7062, lr_0 = 6.1175e-04
Loss = 1.2243e-01, PNorm = 56.5152, GNorm = 1.0848, lr_0 = 6.1134e-04
Loss = 1.2087e-01, PNorm = 56.5322, GNorm = 0.6499, lr_0 = 6.1092e-04
Loss = 1.1566e-01, PNorm = 56.5474, GNorm = 1.3082, lr_0 = 6.1050e-04
Validation mae = 0.442240
Epoch 8
Loss = 1.0870e-01, PNorm = 56.5661, GNorm = 0.6532, lr_0 = 6.1008e-04
Loss = 9.5485e-02, PNorm = 56.5824, GNorm = 0.6447, lr_0 = 6.0966e-04
Loss = 1.1462e-01, PNorm = 56.5986, GNorm = 0.5955, lr_0 = 6.0924e-04
Loss = 1.1835e-01, PNorm = 56.6183, GNorm = 0.7532, lr_0 = 6.0883e-04
Loss = 1.3089e-01, PNorm = 56.6393, GNorm = 0.9609, lr_0 = 6.0841e-04
Loss = 1.0265e-01, PNorm = 56.6596, GNorm = 0.7108, lr_0 = 6.0799e-04
Loss = 1.0804e-01, PNorm = 56.6729, GNorm = 0.8791, lr_0 = 6.0758e-04
Loss = 8.5123e-02, PNorm = 56.6850, GNorm = 0.5431, lr_0 = 6.0716e-04
Loss = 1.0784e-01, PNorm = 56.6968, GNorm = 0.5396, lr_0 = 6.0674e-04
Loss = 9.9896e-02, PNorm = 56.7100, GNorm = 0.5275, lr_0 = 6.0633e-04
Loss = 9.7947e-02, PNorm = 56.7252, GNorm = 0.5003, lr_0 = 6.0591e-04
Loss = 1.1322e-01, PNorm = 56.7428, GNorm = 1.0684, lr_0 = 6.0550e-04
Loss = 1.2242e-01, PNorm = 56.7556, GNorm = 0.9217, lr_0 = 6.0508e-04
Loss = 1.0424e-01, PNorm = 56.7672, GNorm = 0.6791, lr_0 = 6.0467e-04
Loss = 1.0563e-01, PNorm = 56.7821, GNorm = 1.2983, lr_0 = 6.0425e-04
Loss = 1.0151e-01, PNorm = 56.7945, GNorm = 1.3496, lr_0 = 6.0384e-04
Loss = 9.8754e-02, PNorm = 56.8096, GNorm = 1.5738, lr_0 = 6.0343e-04
Loss = 1.2466e-01, PNorm = 56.8316, GNorm = 1.2846, lr_0 = 6.0301e-04
Loss = 1.3259e-01, PNorm = 56.8495, GNorm = 1.8657, lr_0 = 6.0260e-04
Loss = 1.3519e-01, PNorm = 56.8682, GNorm = 0.8470, lr_0 = 6.0219e-04
Loss = 1.1525e-01, PNorm = 56.8858, GNorm = 1.7686, lr_0 = 6.0178e-04
Loss = 1.1349e-01, PNorm = 56.8992, GNorm = 0.6246, lr_0 = 6.0136e-04
Loss = 9.9193e-02, PNorm = 56.9145, GNorm = 0.6190, lr_0 = 6.0095e-04
Loss = 1.0105e-01, PNorm = 56.9292, GNorm = 0.6910, lr_0 = 6.0054e-04
Loss = 9.7134e-02, PNorm = 56.9407, GNorm = 0.5447, lr_0 = 6.0013e-04
Loss = 1.0667e-01, PNorm = 56.9524, GNorm = 0.8060, lr_0 = 5.9972e-04
Loss = 1.0986e-01, PNorm = 56.9688, GNorm = 0.6038, lr_0 = 5.9931e-04
Loss = 1.1689e-01, PNorm = 56.9853, GNorm = 1.1307, lr_0 = 5.9890e-04
Loss = 1.0222e-01, PNorm = 56.9944, GNorm = 0.6271, lr_0 = 5.9849e-04
Loss = 1.3147e-01, PNorm = 57.0088, GNorm = 1.0966, lr_0 = 5.9808e-04
Loss = 1.0227e-01, PNorm = 57.0269, GNorm = 0.6195, lr_0 = 5.9767e-04
Loss = 9.3680e-02, PNorm = 57.0369, GNorm = 0.7460, lr_0 = 5.9726e-04
Loss = 1.2310e-01, PNorm = 57.0506, GNorm = 0.7662, lr_0 = 5.9685e-04
Loss = 1.1314e-01, PNorm = 57.0698, GNorm = 0.4741, lr_0 = 5.9644e-04
Loss = 1.0118e-01, PNorm = 57.0886, GNorm = 0.7160, lr_0 = 5.9603e-04
Loss = 1.1782e-01, PNorm = 57.1078, GNorm = 0.6052, lr_0 = 5.9562e-04
Loss = 1.2227e-01, PNorm = 57.1217, GNorm = 0.7471, lr_0 = 5.9521e-04
Loss = 1.1143e-01, PNorm = 57.1358, GNorm = 0.7621, lr_0 = 5.9481e-04
Loss = 1.1195e-01, PNorm = 57.1517, GNorm = 0.6768, lr_0 = 5.9440e-04
Loss = 1.1062e-01, PNorm = 57.1692, GNorm = 0.5961, lr_0 = 5.9399e-04
Loss = 1.0743e-01, PNorm = 57.1857, GNorm = 0.8663, lr_0 = 5.9358e-04
Loss = 1.1179e-01, PNorm = 57.2020, GNorm = 1.9435, lr_0 = 5.9318e-04
Loss = 1.0276e-01, PNorm = 57.2158, GNorm = 0.7659, lr_0 = 5.9277e-04
Loss = 1.0667e-01, PNorm = 57.2297, GNorm = 0.6689, lr_0 = 5.9236e-04
Loss = 1.0449e-01, PNorm = 57.2434, GNorm = 0.7514, lr_0 = 5.9196e-04
Loss = 9.1276e-02, PNorm = 57.2549, GNorm = 0.8171, lr_0 = 5.9155e-04
Loss = 9.3774e-02, PNorm = 57.2684, GNorm = 0.6198, lr_0 = 5.9115e-04
Loss = 9.7080e-02, PNorm = 57.2834, GNorm = 0.6149, lr_0 = 5.9074e-04
Loss = 1.1439e-01, PNorm = 57.2967, GNorm = 0.4945, lr_0 = 5.9034e-04
Loss = 9.7876e-02, PNorm = 57.3084, GNorm = 0.6085, lr_0 = 5.8993e-04
Loss = 1.0250e-01, PNorm = 57.3204, GNorm = 0.9935, lr_0 = 5.8953e-04
Loss = 1.0226e-01, PNorm = 57.3359, GNorm = 0.4945, lr_0 = 5.8913e-04
Loss = 9.6206e-02, PNorm = 57.3466, GNorm = 1.1988, lr_0 = 5.8872e-04
Loss = 1.1188e-01, PNorm = 57.3582, GNorm = 1.4281, lr_0 = 5.8832e-04
Loss = 1.0016e-01, PNorm = 57.3731, GNorm = 0.6834, lr_0 = 5.8792e-04
Loss = 1.0784e-01, PNorm = 57.3852, GNorm = 0.7207, lr_0 = 5.8751e-04
Loss = 1.0043e-01, PNorm = 57.3992, GNorm = 1.4562, lr_0 = 5.8711e-04
Loss = 1.0508e-01, PNorm = 57.4158, GNorm = 0.7908, lr_0 = 5.8671e-04
Loss = 1.1435e-01, PNorm = 57.4320, GNorm = 1.0258, lr_0 = 5.8631e-04
Loss = 1.0935e-01, PNorm = 57.4496, GNorm = 0.7351, lr_0 = 5.8591e-04
Loss = 9.7312e-02, PNorm = 57.4672, GNorm = 0.7804, lr_0 = 5.8550e-04
Loss = 9.2044e-02, PNorm = 57.4800, GNorm = 0.6381, lr_0 = 5.8510e-04
Loss = 1.1551e-01, PNorm = 57.4930, GNorm = 2.1322, lr_0 = 5.8470e-04
Loss = 1.0614e-01, PNorm = 57.5097, GNorm = 1.5520, lr_0 = 5.8430e-04
Loss = 1.2048e-01, PNorm = 57.5252, GNorm = 1.6499, lr_0 = 5.8390e-04
Loss = 1.1262e-01, PNorm = 57.5455, GNorm = 1.1023, lr_0 = 5.8350e-04
Loss = 1.2545e-01, PNorm = 57.5639, GNorm = 1.6067, lr_0 = 5.8310e-04
Loss = 1.0729e-01, PNorm = 57.5793, GNorm = 0.4581, lr_0 = 5.8270e-04
Loss = 1.0225e-01, PNorm = 57.5967, GNorm = 0.8763, lr_0 = 5.8230e-04
Loss = 1.0547e-01, PNorm = 57.6095, GNorm = 1.0473, lr_0 = 5.8190e-04
Loss = 1.0874e-01, PNorm = 57.6193, GNorm = 0.4724, lr_0 = 5.8151e-04
Loss = 1.2864e-01, PNorm = 57.6287, GNorm = 0.8297, lr_0 = 5.8111e-04
Loss = 1.0289e-01, PNorm = 57.6408, GNorm = 0.4464, lr_0 = 5.8071e-04
Loss = 1.0469e-01, PNorm = 57.6557, GNorm = 0.6040, lr_0 = 5.8031e-04
Loss = 1.0221e-01, PNorm = 57.6684, GNorm = 0.8147, lr_0 = 5.7991e-04
Loss = 1.0788e-01, PNorm = 57.6818, GNorm = 1.2018, lr_0 = 5.7952e-04
Loss = 9.8958e-02, PNorm = 57.6976, GNorm = 1.0301, lr_0 = 5.7912e-04
Loss = 1.1841e-01, PNorm = 57.7163, GNorm = 0.6799, lr_0 = 5.7872e-04
Loss = 9.9842e-02, PNorm = 57.7308, GNorm = 0.8219, lr_0 = 5.7833e-04
Loss = 9.9829e-02, PNorm = 57.7485, GNorm = 0.6418, lr_0 = 5.7793e-04
Loss = 1.0194e-01, PNorm = 57.7675, GNorm = 0.5368, lr_0 = 5.7753e-04
Loss = 9.8515e-02, PNorm = 57.7800, GNorm = 0.7119, lr_0 = 5.7714e-04
Loss = 1.0893e-01, PNorm = 57.7920, GNorm = 0.5235, lr_0 = 5.7674e-04
Loss = 9.4401e-02, PNorm = 57.8070, GNorm = 0.5107, lr_0 = 5.7635e-04
Loss = 9.9842e-02, PNorm = 57.8134, GNorm = 0.6528, lr_0 = 5.7595e-04
Loss = 1.0041e-01, PNorm = 57.8265, GNorm = 0.6679, lr_0 = 5.7556e-04
Loss = 1.3056e-01, PNorm = 57.8392, GNorm = 0.9358, lr_0 = 5.7516e-04
Loss = 9.6243e-02, PNorm = 57.8528, GNorm = 0.8331, lr_0 = 5.7477e-04
Loss = 1.0838e-01, PNorm = 57.8663, GNorm = 1.0674, lr_0 = 5.7438e-04
Loss = 1.3609e-01, PNorm = 57.8798, GNorm = 1.2149, lr_0 = 5.7398e-04
Loss = 1.0837e-01, PNorm = 57.8946, GNorm = 1.1303, lr_0 = 5.7359e-04
Loss = 1.1743e-01, PNorm = 57.9132, GNorm = 0.7425, lr_0 = 5.7320e-04
Loss = 1.2885e-01, PNorm = 57.9332, GNorm = 0.9800, lr_0 = 5.7280e-04
Loss = 1.1479e-01, PNorm = 57.9477, GNorm = 1.2878, lr_0 = 5.7241e-04
Loss = 1.0378e-01, PNorm = 57.9611, GNorm = 0.7137, lr_0 = 5.7202e-04
Loss = 9.6437e-02, PNorm = 57.9762, GNorm = 0.7172, lr_0 = 5.7163e-04
Loss = 1.2282e-01, PNorm = 57.9916, GNorm = 0.5800, lr_0 = 5.7124e-04
Loss = 1.3143e-01, PNorm = 58.0112, GNorm = 0.6511, lr_0 = 5.7084e-04
Loss = 9.0192e-02, PNorm = 58.0283, GNorm = 0.5650, lr_0 = 5.7045e-04
Loss = 9.3706e-02, PNorm = 58.0448, GNorm = 0.5617, lr_0 = 5.7006e-04
Loss = 1.0648e-01, PNorm = 58.0555, GNorm = 0.7859, lr_0 = 5.6967e-04
Loss = 1.2715e-01, PNorm = 58.0710, GNorm = 1.1772, lr_0 = 5.6928e-04
Loss = 1.0835e-01, PNorm = 58.0874, GNorm = 1.1160, lr_0 = 5.6889e-04
Loss = 1.2286e-01, PNorm = 58.1007, GNorm = 0.5746, lr_0 = 5.6850e-04
Loss = 1.1335e-01, PNorm = 58.1181, GNorm = 0.7342, lr_0 = 5.6811e-04
Loss = 1.0738e-01, PNorm = 58.1256, GNorm = 0.9268, lr_0 = 5.6772e-04
Loss = 1.3085e-01, PNorm = 58.1406, GNorm = 0.4536, lr_0 = 5.6733e-04
Loss = 1.0707e-01, PNorm = 58.1594, GNorm = 0.5533, lr_0 = 5.6695e-04
Loss = 9.3285e-02, PNorm = 58.1727, GNorm = 0.5731, lr_0 = 5.6656e-04
Loss = 1.1373e-01, PNorm = 58.1828, GNorm = 0.6742, lr_0 = 5.6617e-04
Loss = 1.0002e-01, PNorm = 58.1968, GNorm = 0.7052, lr_0 = 5.6578e-04
Loss = 1.0122e-01, PNorm = 58.2104, GNorm = 0.6741, lr_0 = 5.6539e-04
Loss = 8.9855e-02, PNorm = 58.2225, GNorm = 0.7596, lr_0 = 5.6501e-04
Loss = 9.6869e-02, PNorm = 58.2309, GNorm = 0.4687, lr_0 = 5.6462e-04
Loss = 1.0701e-01, PNorm = 58.2402, GNorm = 0.7322, lr_0 = 5.6423e-04
Loss = 1.1489e-01, PNorm = 58.2580, GNorm = 0.7857, lr_0 = 5.6385e-04
Loss = 1.0799e-01, PNorm = 58.2738, GNorm = 0.6292, lr_0 = 5.6346e-04
Loss = 1.1825e-01, PNorm = 58.2882, GNorm = 0.9600, lr_0 = 5.6307e-04
Loss = 1.1779e-01, PNorm = 58.3011, GNorm = 1.0286, lr_0 = 5.6269e-04
Loss = 1.1707e-01, PNorm = 58.3177, GNorm = 0.4921, lr_0 = 5.6230e-04
Validation mae = 0.406664
Epoch 9
Loss = 9.2009e-02, PNorm = 58.3352, GNorm = 0.4342, lr_0 = 5.6192e-04
Loss = 8.9814e-02, PNorm = 58.3479, GNorm = 0.5675, lr_0 = 5.6153e-04
Loss = 8.2509e-02, PNorm = 58.3613, GNorm = 0.5929, lr_0 = 5.6115e-04
Loss = 9.6064e-02, PNorm = 58.3720, GNorm = 0.4771, lr_0 = 5.6076e-04
Loss = 8.9997e-02, PNorm = 58.3791, GNorm = 0.6661, lr_0 = 5.6038e-04
Loss = 9.5888e-02, PNorm = 58.3866, GNorm = 1.1909, lr_0 = 5.6000e-04
Loss = 1.1696e-01, PNorm = 58.4000, GNorm = 1.2707, lr_0 = 5.5961e-04
Loss = 1.0539e-01, PNorm = 58.4167, GNorm = 1.6168, lr_0 = 5.5923e-04
Loss = 9.6597e-02, PNorm = 58.4323, GNorm = 0.7976, lr_0 = 5.5885e-04
Loss = 9.7881e-02, PNorm = 58.4535, GNorm = 0.5133, lr_0 = 5.5846e-04
Loss = 9.5619e-02, PNorm = 58.4724, GNorm = 0.8066, lr_0 = 5.5808e-04
Loss = 1.0056e-01, PNorm = 58.4881, GNorm = 0.5737, lr_0 = 5.5770e-04
Loss = 1.0517e-01, PNorm = 58.5043, GNorm = 0.6554, lr_0 = 5.5732e-04
Loss = 8.7692e-02, PNorm = 58.5138, GNorm = 1.1230, lr_0 = 5.5693e-04
Loss = 1.1386e-01, PNorm = 58.5264, GNorm = 0.5707, lr_0 = 5.5655e-04
Loss = 9.5267e-02, PNorm = 58.5368, GNorm = 0.7933, lr_0 = 5.5617e-04
Loss = 8.9990e-02, PNorm = 58.5494, GNorm = 0.8522, lr_0 = 5.5579e-04
Loss = 9.6075e-02, PNorm = 58.5645, GNorm = 0.6910, lr_0 = 5.5541e-04
Loss = 1.0941e-01, PNorm = 58.5811, GNorm = 0.7926, lr_0 = 5.5503e-04
Loss = 9.6277e-02, PNorm = 58.5992, GNorm = 0.6919, lr_0 = 5.5465e-04
Loss = 1.0936e-01, PNorm = 58.6187, GNorm = 0.7083, lr_0 = 5.5427e-04
Loss = 1.0268e-01, PNorm = 58.6328, GNorm = 0.5606, lr_0 = 5.5389e-04
Loss = 9.9347e-02, PNorm = 58.6429, GNorm = 0.8257, lr_0 = 5.5351e-04
Loss = 9.2231e-02, PNorm = 58.6583, GNorm = 0.9146, lr_0 = 5.5313e-04
Loss = 9.2578e-02, PNorm = 58.6733, GNorm = 0.6423, lr_0 = 5.5275e-04
Loss = 9.0899e-02, PNorm = 58.6847, GNorm = 0.8931, lr_0 = 5.5237e-04
Loss = 9.4217e-02, PNorm = 58.6979, GNorm = 0.7869, lr_0 = 5.5199e-04
Loss = 1.1558e-01, PNorm = 58.7102, GNorm = 0.5207, lr_0 = 5.5162e-04
Loss = 1.0743e-01, PNorm = 58.7213, GNorm = 0.7541, lr_0 = 5.5124e-04
Loss = 9.5403e-02, PNorm = 58.7369, GNorm = 0.9203, lr_0 = 5.5086e-04
Loss = 9.7631e-02, PNorm = 58.7455, GNorm = 0.6467, lr_0 = 5.5048e-04
Loss = 9.8526e-02, PNorm = 58.7548, GNorm = 0.6711, lr_0 = 5.5011e-04
Loss = 1.0859e-01, PNorm = 58.7699, GNorm = 0.9911, lr_0 = 5.4973e-04
Loss = 1.0481e-01, PNorm = 58.7867, GNorm = 0.7886, lr_0 = 5.4935e-04
Loss = 9.1332e-02, PNorm = 58.8056, GNorm = 0.8102, lr_0 = 5.4898e-04
Loss = 1.0279e-01, PNorm = 58.8190, GNorm = 0.7316, lr_0 = 5.4860e-04
Loss = 9.2400e-02, PNorm = 58.8269, GNorm = 0.9030, lr_0 = 5.4822e-04
Loss = 9.6758e-02, PNorm = 58.8390, GNorm = 0.5784, lr_0 = 5.4785e-04
Loss = 9.7580e-02, PNorm = 58.8504, GNorm = 0.6958, lr_0 = 5.4747e-04
Loss = 9.8973e-02, PNorm = 58.8675, GNorm = 0.5401, lr_0 = 5.4710e-04
Loss = 1.1882e-01, PNorm = 58.8872, GNorm = 0.8777, lr_0 = 5.4672e-04
Loss = 1.2583e-01, PNorm = 58.9009, GNorm = 2.0545, lr_0 = 5.4635e-04
Loss = 1.1267e-01, PNorm = 58.9186, GNorm = 2.2245, lr_0 = 5.4597e-04
Loss = 1.1991e-01, PNorm = 58.9346, GNorm = 1.1374, lr_0 = 5.4560e-04
Loss = 1.1000e-01, PNorm = 58.9507, GNorm = 1.0162, lr_0 = 5.4523e-04
Loss = 9.3372e-02, PNorm = 58.9617, GNorm = 0.4677, lr_0 = 5.4485e-04
Loss = 9.3193e-02, PNorm = 58.9762, GNorm = 1.2861, lr_0 = 5.4448e-04
Loss = 1.0081e-01, PNorm = 58.9896, GNorm = 0.9029, lr_0 = 5.4411e-04
Loss = 1.1900e-01, PNorm = 59.0027, GNorm = 0.9682, lr_0 = 5.4373e-04
Loss = 9.5746e-02, PNorm = 59.0140, GNorm = 0.5982, lr_0 = 5.4336e-04
Loss = 1.1090e-01, PNorm = 59.0232, GNorm = 0.8625, lr_0 = 5.4299e-04
Loss = 1.1091e-01, PNorm = 59.0367, GNorm = 0.7643, lr_0 = 5.4262e-04
Loss = 9.9248e-02, PNorm = 59.0574, GNorm = 0.6801, lr_0 = 5.4225e-04
Loss = 1.0018e-01, PNorm = 59.0766, GNorm = 0.8883, lr_0 = 5.4187e-04
Loss = 1.0820e-01, PNorm = 59.0891, GNorm = 0.9049, lr_0 = 5.4150e-04
Loss = 1.2054e-01, PNorm = 59.1024, GNorm = 0.5849, lr_0 = 5.4113e-04
Loss = 1.1316e-01, PNorm = 59.1190, GNorm = 1.0449, lr_0 = 5.4076e-04
Loss = 1.1400e-01, PNorm = 59.1375, GNorm = 0.7115, lr_0 = 5.4039e-04
Loss = 1.0652e-01, PNorm = 59.1534, GNorm = 0.7885, lr_0 = 5.4002e-04
Loss = 8.2776e-02, PNorm = 59.1683, GNorm = 0.4316, lr_0 = 5.3965e-04
Loss = 1.1608e-01, PNorm = 59.1766, GNorm = 0.7251, lr_0 = 5.3928e-04
Loss = 1.0044e-01, PNorm = 59.1861, GNorm = 0.5735, lr_0 = 5.3891e-04
Loss = 9.5462e-02, PNorm = 59.1973, GNorm = 0.5185, lr_0 = 5.3854e-04
Loss = 9.3160e-02, PNorm = 59.2088, GNorm = 0.6139, lr_0 = 5.3817e-04
Loss = 9.6027e-02, PNorm = 59.2234, GNorm = 0.7615, lr_0 = 5.3781e-04
Loss = 9.5349e-02, PNorm = 59.2352, GNorm = 1.3409, lr_0 = 5.3744e-04
Loss = 9.3908e-02, PNorm = 59.2458, GNorm = 0.8686, lr_0 = 5.3707e-04
Loss = 1.0259e-01, PNorm = 59.2587, GNorm = 1.4328, lr_0 = 5.3670e-04
Loss = 1.0076e-01, PNorm = 59.2770, GNorm = 1.2026, lr_0 = 5.3633e-04
Loss = 1.1297e-01, PNorm = 59.2980, GNorm = 0.6697, lr_0 = 5.3597e-04
Loss = 9.7312e-02, PNorm = 59.3138, GNorm = 1.3722, lr_0 = 5.3560e-04
Loss = 9.1749e-02, PNorm = 59.3250, GNorm = 0.8721, lr_0 = 5.3523e-04
Loss = 1.4129e-01, PNorm = 59.3407, GNorm = 0.9840, lr_0 = 5.3486e-04
Loss = 1.1303e-01, PNorm = 59.3589, GNorm = 0.8416, lr_0 = 5.3450e-04
Loss = 1.0647e-01, PNorm = 59.3738, GNorm = 0.7601, lr_0 = 5.3413e-04
Loss = 9.6531e-02, PNorm = 59.3828, GNorm = 0.8216, lr_0 = 5.3377e-04
Loss = 9.7880e-02, PNorm = 59.3947, GNorm = 0.8560, lr_0 = 5.3340e-04
Loss = 9.8000e-02, PNorm = 59.4089, GNorm = 0.6992, lr_0 = 5.3304e-04
Loss = 1.0421e-01, PNorm = 59.4205, GNorm = 1.2532, lr_0 = 5.3267e-04
Loss = 8.9569e-02, PNorm = 59.4317, GNorm = 0.5981, lr_0 = 5.3231e-04
Loss = 9.6164e-02, PNorm = 59.4460, GNorm = 0.6343, lr_0 = 5.3194e-04
Loss = 9.9695e-02, PNorm = 59.4564, GNorm = 1.3683, lr_0 = 5.3158e-04
Loss = 8.7697e-02, PNorm = 59.4632, GNorm = 0.5406, lr_0 = 5.3121e-04
Loss = 1.0080e-01, PNorm = 59.4727, GNorm = 0.7303, lr_0 = 5.3085e-04
Loss = 1.1363e-01, PNorm = 59.4867, GNorm = 0.7347, lr_0 = 5.3048e-04
Loss = 8.6984e-02, PNorm = 59.4962, GNorm = 0.5178, lr_0 = 5.3012e-04
Loss = 9.9410e-02, PNorm = 59.5065, GNorm = 0.9974, lr_0 = 5.2976e-04
Loss = 1.0363e-01, PNorm = 59.5225, GNorm = 1.0796, lr_0 = 5.2939e-04
Loss = 1.0836e-01, PNorm = 59.5354, GNorm = 0.7947, lr_0 = 5.2903e-04
Loss = 9.1537e-02, PNorm = 59.5472, GNorm = 0.7494, lr_0 = 5.2867e-04
Loss = 8.6762e-02, PNorm = 59.5604, GNorm = 0.7519, lr_0 = 5.2831e-04
Loss = 9.0733e-02, PNorm = 59.5702, GNorm = 1.2932, lr_0 = 5.2795e-04
Loss = 1.0318e-01, PNorm = 59.5799, GNorm = 0.6370, lr_0 = 5.2758e-04
Loss = 8.6251e-02, PNorm = 59.5919, GNorm = 0.6771, lr_0 = 5.2722e-04
Loss = 9.0846e-02, PNorm = 59.6006, GNorm = 0.5780, lr_0 = 5.2686e-04
Loss = 9.5032e-02, PNorm = 59.6128, GNorm = 0.5848, lr_0 = 5.2650e-04
Loss = 1.0919e-01, PNorm = 59.6195, GNorm = 0.8845, lr_0 = 5.2614e-04
Loss = 1.0018e-01, PNorm = 59.6283, GNorm = 0.5466, lr_0 = 5.2578e-04
Loss = 8.7878e-02, PNorm = 59.6417, GNorm = 0.4680, lr_0 = 5.2542e-04
Loss = 1.0116e-01, PNorm = 59.6561, GNorm = 0.7567, lr_0 = 5.2506e-04
Loss = 1.0594e-01, PNorm = 59.6734, GNorm = 0.8222, lr_0 = 5.2470e-04
Loss = 1.0178e-01, PNorm = 59.6924, GNorm = 0.7329, lr_0 = 5.2434e-04
Loss = 1.1175e-01, PNorm = 59.7102, GNorm = 0.6232, lr_0 = 5.2398e-04
Loss = 9.7793e-02, PNorm = 59.7246, GNorm = 0.8942, lr_0 = 5.2362e-04
Loss = 1.1042e-01, PNorm = 59.7411, GNorm = 0.9770, lr_0 = 5.2326e-04
Loss = 9.6316e-02, PNorm = 59.7558, GNorm = 1.5785, lr_0 = 5.2290e-04
Loss = 9.2510e-02, PNorm = 59.7690, GNorm = 0.8184, lr_0 = 5.2255e-04
Loss = 1.0532e-01, PNorm = 59.7794, GNorm = 0.7572, lr_0 = 5.2219e-04
Loss = 1.0345e-01, PNorm = 59.7902, GNorm = 0.5614, lr_0 = 5.2183e-04
Loss = 1.0815e-01, PNorm = 59.7997, GNorm = 0.8861, lr_0 = 5.2147e-04
Loss = 1.0016e-01, PNorm = 59.8077, GNorm = 0.6454, lr_0 = 5.2112e-04
Loss = 1.0850e-01, PNorm = 59.8207, GNorm = 0.5304, lr_0 = 5.2076e-04
Loss = 1.2304e-01, PNorm = 59.8357, GNorm = 0.5450, lr_0 = 5.2040e-04
Loss = 1.1379e-01, PNorm = 59.8484, GNorm = 0.7667, lr_0 = 5.2005e-04
Loss = 1.1003e-01, PNorm = 59.8612, GNorm = 0.7817, lr_0 = 5.1969e-04
Loss = 1.2370e-01, PNorm = 59.8748, GNorm = 1.2351, lr_0 = 5.1933e-04
Loss = 8.7709e-02, PNorm = 59.8906, GNorm = 0.9064, lr_0 = 5.1898e-04
Loss = 9.9990e-02, PNorm = 59.9030, GNorm = 0.8269, lr_0 = 5.1862e-04
Loss = 1.0828e-01, PNorm = 59.9152, GNorm = 0.5826, lr_0 = 5.1827e-04
Loss = 1.1282e-01, PNorm = 59.9283, GNorm = 0.5385, lr_0 = 5.1791e-04
Validation mae = 0.404913
Epoch 10
Loss = 9.1578e-02, PNorm = 59.9379, GNorm = 0.5593, lr_0 = 5.1756e-04
Loss = 9.7455e-02, PNorm = 59.9477, GNorm = 0.8876, lr_0 = 5.1720e-04
Loss = 7.8489e-02, PNorm = 59.9606, GNorm = 0.5021, lr_0 = 5.1685e-04
Loss = 8.7356e-02, PNorm = 59.9706, GNorm = 0.8395, lr_0 = 5.1649e-04
Loss = 9.5932e-02, PNorm = 59.9800, GNorm = 0.5305, lr_0 = 5.1614e-04
Loss = 1.0051e-01, PNorm = 59.9979, GNorm = 0.8444, lr_0 = 5.1579e-04
Loss = 9.2291e-02, PNorm = 60.0097, GNorm = 0.7947, lr_0 = 5.1543e-04
Loss = 8.5262e-02, PNorm = 60.0226, GNorm = 0.7400, lr_0 = 5.1508e-04
Loss = 7.3571e-02, PNorm = 60.0376, GNorm = 0.5599, lr_0 = 5.1473e-04
Loss = 1.0485e-01, PNorm = 60.0542, GNorm = 0.8224, lr_0 = 5.1437e-04
Loss = 8.8105e-02, PNorm = 60.0709, GNorm = 0.8215, lr_0 = 5.1402e-04
Loss = 7.3686e-02, PNorm = 60.0846, GNorm = 0.4366, lr_0 = 5.1367e-04
Loss = 9.0932e-02, PNorm = 60.0995, GNorm = 0.6283, lr_0 = 5.1332e-04
Loss = 8.6751e-02, PNorm = 60.1148, GNorm = 0.7827, lr_0 = 5.1297e-04
Loss = 8.8760e-02, PNorm = 60.1272, GNorm = 0.7407, lr_0 = 5.1262e-04
Loss = 8.9887e-02, PNorm = 60.1371, GNorm = 0.4276, lr_0 = 5.1226e-04
Loss = 9.0792e-02, PNorm = 60.1472, GNorm = 0.6539, lr_0 = 5.1191e-04
Loss = 8.7469e-02, PNorm = 60.1567, GNorm = 0.5886, lr_0 = 5.1156e-04
Loss = 9.6319e-02, PNorm = 60.1674, GNorm = 0.6093, lr_0 = 5.1121e-04
Loss = 8.2773e-02, PNorm = 60.1790, GNorm = 0.9339, lr_0 = 5.1086e-04
Loss = 7.9280e-02, PNorm = 60.1915, GNorm = 0.4491, lr_0 = 5.1051e-04
Loss = 9.7235e-02, PNorm = 60.2069, GNorm = 1.0032, lr_0 = 5.1016e-04
Loss = 8.8566e-02, PNorm = 60.2190, GNorm = 0.5658, lr_0 = 5.0981e-04
Loss = 8.1401e-02, PNorm = 60.2297, GNorm = 0.5280, lr_0 = 5.0946e-04
Loss = 8.4494e-02, PNorm = 60.2417, GNorm = 0.6447, lr_0 = 5.0911e-04
Loss = 8.0912e-02, PNorm = 60.2523, GNorm = 0.5271, lr_0 = 5.0877e-04
Loss = 1.0329e-01, PNorm = 60.2651, GNorm = 0.6118, lr_0 = 5.0842e-04
Loss = 8.3955e-02, PNorm = 60.2732, GNorm = 0.8325, lr_0 = 5.0807e-04
Loss = 7.9201e-02, PNorm = 60.2875, GNorm = 0.5771, lr_0 = 5.0772e-04
Loss = 9.0276e-02, PNorm = 60.3007, GNorm = 0.7984, lr_0 = 5.0737e-04
Loss = 8.5861e-02, PNorm = 60.3144, GNorm = 0.6541, lr_0 = 5.0703e-04
Loss = 8.0919e-02, PNorm = 60.3267, GNorm = 0.6891, lr_0 = 5.0668e-04
Loss = 7.2149e-02, PNorm = 60.3398, GNorm = 0.6812, lr_0 = 5.0633e-04
Loss = 8.2726e-02, PNorm = 60.3514, GNorm = 1.3768, lr_0 = 5.0598e-04
Loss = 1.0094e-01, PNorm = 60.3659, GNorm = 0.6570, lr_0 = 5.0564e-04
Loss = 8.2635e-02, PNorm = 60.3813, GNorm = 0.4875, lr_0 = 5.0529e-04
Loss = 9.5987e-02, PNorm = 60.3952, GNorm = 1.1089, lr_0 = 5.0494e-04
Loss = 8.5718e-02, PNorm = 60.4085, GNorm = 0.5237, lr_0 = 5.0460e-04
Loss = 9.2243e-02, PNorm = 60.4194, GNorm = 1.1993, lr_0 = 5.0425e-04
Loss = 1.0275e-01, PNorm = 60.4285, GNorm = 0.6825, lr_0 = 5.0391e-04
Loss = 8.8994e-02, PNorm = 60.4412, GNorm = 0.6273, lr_0 = 5.0356e-04
Loss = 8.0943e-02, PNorm = 60.4503, GNorm = 0.5022, lr_0 = 5.0322e-04
Loss = 9.9696e-02, PNorm = 60.4584, GNorm = 0.6447, lr_0 = 5.0287e-04
Loss = 8.9894e-02, PNorm = 60.4645, GNorm = 0.9093, lr_0 = 5.0253e-04
Loss = 1.0003e-01, PNorm = 60.4735, GNorm = 0.7196, lr_0 = 5.0218e-04
Loss = 9.1572e-02, PNorm = 60.4880, GNorm = 0.5606, lr_0 = 5.0184e-04
Loss = 9.3712e-02, PNorm = 60.5032, GNorm = 0.9335, lr_0 = 5.0150e-04
Loss = 1.1001e-01, PNorm = 60.5144, GNorm = 0.9527, lr_0 = 5.0115e-04
Loss = 9.2076e-02, PNorm = 60.5227, GNorm = 0.5125, lr_0 = 5.0081e-04
Loss = 8.9540e-02, PNorm = 60.5274, GNorm = 0.6906, lr_0 = 5.0047e-04
Loss = 9.2386e-02, PNorm = 60.5401, GNorm = 0.8300, lr_0 = 5.0012e-04
Loss = 7.9165e-02, PNorm = 60.5529, GNorm = 0.5663, lr_0 = 4.9978e-04
Loss = 8.0075e-02, PNorm = 60.5674, GNorm = 0.5558, lr_0 = 4.9944e-04
Loss = 8.9945e-02, PNorm = 60.5788, GNorm = 0.4611, lr_0 = 4.9910e-04
Loss = 9.5034e-02, PNorm = 60.5940, GNorm = 0.5203, lr_0 = 4.9875e-04
Loss = 8.6325e-02, PNorm = 60.6073, GNorm = 0.6062, lr_0 = 4.9841e-04
Loss = 8.8783e-02, PNorm = 60.6166, GNorm = 0.5562, lr_0 = 4.9807e-04
Loss = 1.0294e-01, PNorm = 60.6291, GNorm = 0.5944, lr_0 = 4.9773e-04
Loss = 1.0679e-01, PNorm = 60.6435, GNorm = 0.5744, lr_0 = 4.9739e-04
Loss = 8.0885e-02, PNorm = 60.6559, GNorm = 0.6527, lr_0 = 4.9705e-04
Loss = 9.2119e-02, PNorm = 60.6685, GNorm = 0.6267, lr_0 = 4.9671e-04
Loss = 9.4863e-02, PNorm = 60.6805, GNorm = 0.5757, lr_0 = 4.9637e-04
Loss = 9.9790e-02, PNorm = 60.6933, GNorm = 0.5895, lr_0 = 4.9603e-04
Loss = 8.9525e-02, PNorm = 60.7028, GNorm = 0.6614, lr_0 = 4.9569e-04
Loss = 9.3181e-02, PNorm = 60.7132, GNorm = 1.0680, lr_0 = 4.9535e-04
Loss = 9.3964e-02, PNorm = 60.7273, GNorm = 0.9997, lr_0 = 4.9501e-04
Loss = 9.7767e-02, PNorm = 60.7308, GNorm = 0.6114, lr_0 = 4.9467e-04
Loss = 8.9146e-02, PNorm = 60.7367, GNorm = 0.4928, lr_0 = 4.9433e-04
Loss = 1.0298e-01, PNorm = 60.7475, GNorm = 1.0522, lr_0 = 4.9399e-04
Loss = 1.1282e-01, PNorm = 60.7702, GNorm = 1.5496, lr_0 = 4.9365e-04
Loss = 9.2396e-02, PNorm = 60.7817, GNorm = 1.2754, lr_0 = 4.9332e-04
Loss = 9.4668e-02, PNorm = 60.7932, GNorm = 0.5460, lr_0 = 4.9298e-04
Loss = 1.0115e-01, PNorm = 60.8069, GNorm = 0.7308, lr_0 = 4.9264e-04
Loss = 9.1091e-02, PNorm = 60.8198, GNorm = 0.5353, lr_0 = 4.9230e-04
Loss = 1.0587e-01, PNorm = 60.8321, GNorm = 0.5813, lr_0 = 4.9197e-04
Loss = 8.6573e-02, PNorm = 60.8441, GNorm = 0.6332, lr_0 = 4.9163e-04
Loss = 8.4817e-02, PNorm = 60.8573, GNorm = 0.9366, lr_0 = 4.9129e-04
Loss = 1.1158e-01, PNorm = 60.8744, GNorm = 1.1730, lr_0 = 4.9095e-04
Loss = 1.0635e-01, PNorm = 60.8877, GNorm = 1.3878, lr_0 = 4.9062e-04
Loss = 1.0478e-01, PNorm = 60.9034, GNorm = 1.4813, lr_0 = 4.9028e-04
Loss = 1.1956e-01, PNorm = 60.9192, GNorm = 1.5432, lr_0 = 4.8995e-04
Loss = 1.0796e-01, PNorm = 60.9369, GNorm = 1.1379, lr_0 = 4.8961e-04
Loss = 8.8379e-02, PNorm = 60.9480, GNorm = 1.2457, lr_0 = 4.8928e-04
Loss = 1.1085e-01, PNorm = 60.9591, GNorm = 0.9019, lr_0 = 4.8894e-04
Loss = 9.3834e-02, PNorm = 60.9744, GNorm = 0.6831, lr_0 = 4.8861e-04
Loss = 8.5192e-02, PNorm = 60.9838, GNorm = 0.8068, lr_0 = 4.8827e-04
Loss = 9.8898e-02, PNorm = 60.9898, GNorm = 0.7866, lr_0 = 4.8794e-04
Loss = 9.1206e-02, PNorm = 60.9975, GNorm = 0.6594, lr_0 = 4.8760e-04
Loss = 8.9784e-02, PNorm = 61.0064, GNorm = 0.9420, lr_0 = 4.8727e-04
Loss = 9.1782e-02, PNorm = 61.0151, GNorm = 0.4392, lr_0 = 4.8693e-04
Loss = 1.0133e-01, PNorm = 61.0275, GNorm = 0.9201, lr_0 = 4.8660e-04
Loss = 9.3764e-02, PNorm = 61.0420, GNorm = 0.5957, lr_0 = 4.8627e-04
Loss = 1.0655e-01, PNorm = 61.0542, GNorm = 1.8376, lr_0 = 4.8593e-04
Loss = 9.5015e-02, PNorm = 61.0662, GNorm = 0.5219, lr_0 = 4.8560e-04
Loss = 1.1726e-01, PNorm = 61.0780, GNorm = 0.6436, lr_0 = 4.8527e-04
Loss = 1.0413e-01, PNorm = 61.0931, GNorm = 0.8842, lr_0 = 4.8494e-04
Loss = 9.0674e-02, PNorm = 61.1031, GNorm = 0.7989, lr_0 = 4.8460e-04
Loss = 9.3183e-02, PNorm = 61.1134, GNorm = 0.8011, lr_0 = 4.8427e-04
Loss = 9.2969e-02, PNorm = 61.1235, GNorm = 0.7828, lr_0 = 4.8394e-04
Loss = 8.7490e-02, PNorm = 61.1319, GNorm = 0.8020, lr_0 = 4.8361e-04
Loss = 9.2278e-02, PNorm = 61.1395, GNorm = 0.6484, lr_0 = 4.8328e-04
Loss = 9.2646e-02, PNorm = 61.1452, GNorm = 0.5323, lr_0 = 4.8295e-04
Loss = 9.9094e-02, PNorm = 61.1527, GNorm = 0.6702, lr_0 = 4.8262e-04
Loss = 8.6428e-02, PNorm = 61.1665, GNorm = 0.7227, lr_0 = 4.8228e-04
Loss = 1.0570e-01, PNorm = 61.1761, GNorm = 0.5674, lr_0 = 4.8195e-04
Loss = 9.1960e-02, PNorm = 61.1850, GNorm = 0.5361, lr_0 = 4.8162e-04
Loss = 9.2725e-02, PNorm = 61.1920, GNorm = 0.6936, lr_0 = 4.8129e-04
Loss = 8.7546e-02, PNorm = 61.1989, GNorm = 0.7828, lr_0 = 4.8096e-04
Loss = 9.9883e-02, PNorm = 61.2079, GNorm = 0.5859, lr_0 = 4.8064e-04
Loss = 8.8123e-02, PNorm = 61.2185, GNorm = 0.6098, lr_0 = 4.8031e-04
Loss = 1.1192e-01, PNorm = 61.2293, GNorm = 0.5417, lr_0 = 4.7998e-04
Loss = 8.8818e-02, PNorm = 61.2431, GNorm = 0.6031, lr_0 = 4.7965e-04
Loss = 9.1441e-02, PNorm = 61.2538, GNorm = 0.6697, lr_0 = 4.7932e-04
Loss = 8.4060e-02, PNorm = 61.2630, GNorm = 0.5468, lr_0 = 4.7899e-04
Loss = 9.4687e-02, PNorm = 61.2723, GNorm = 0.5370, lr_0 = 4.7866e-04
Loss = 9.8363e-02, PNorm = 61.2811, GNorm = 0.7056, lr_0 = 4.7833e-04
Loss = 1.0256e-01, PNorm = 61.2912, GNorm = 0.8922, lr_0 = 4.7801e-04
Loss = 9.5145e-02, PNorm = 61.3067, GNorm = 1.0726, lr_0 = 4.7768e-04
Loss = 9.4873e-02, PNorm = 61.3210, GNorm = 0.7612, lr_0 = 4.7735e-04
Loss = 1.0681e-01, PNorm = 61.3340, GNorm = 1.3421, lr_0 = 4.7703e-04
Validation mae = 0.417585
Epoch 11
Loss = 7.3738e-02, PNorm = 61.3429, GNorm = 0.6331, lr_0 = 4.7670e-04
Loss = 7.9135e-02, PNorm = 61.3528, GNorm = 0.4871, lr_0 = 4.7637e-04
Loss = 7.3563e-02, PNorm = 61.3618, GNorm = 0.5437, lr_0 = 4.7605e-04
Loss = 7.9362e-02, PNorm = 61.3725, GNorm = 0.4608, lr_0 = 4.7572e-04
Loss = 8.6309e-02, PNorm = 61.3888, GNorm = 0.7392, lr_0 = 4.7539e-04
Loss = 8.6028e-02, PNorm = 61.4048, GNorm = 0.5716, lr_0 = 4.7507e-04
Loss = 7.3231e-02, PNorm = 61.4128, GNorm = 0.3373, lr_0 = 4.7474e-04
Loss = 8.0665e-02, PNorm = 61.4187, GNorm = 0.4800, lr_0 = 4.7442e-04
Loss = 8.0802e-02, PNorm = 61.4285, GNorm = 0.5774, lr_0 = 4.7409e-04
Loss = 8.7477e-02, PNorm = 61.4372, GNorm = 0.7044, lr_0 = 4.7377e-04
Loss = 8.1446e-02, PNorm = 61.4478, GNorm = 0.8264, lr_0 = 4.7344e-04
Loss = 7.6615e-02, PNorm = 61.4611, GNorm = 0.7335, lr_0 = 4.7312e-04
Loss = 7.6578e-02, PNorm = 61.4738, GNorm = 0.7940, lr_0 = 4.7279e-04
Loss = 7.8794e-02, PNorm = 61.4825, GNorm = 0.6854, lr_0 = 4.7247e-04
Loss = 9.0951e-02, PNorm = 61.4939, GNorm = 0.6024, lr_0 = 4.7215e-04
Loss = 8.4254e-02, PNorm = 61.5037, GNorm = 0.7742, lr_0 = 4.7182e-04
Loss = 7.6865e-02, PNorm = 61.5116, GNorm = 0.5812, lr_0 = 4.7150e-04
Loss = 8.6067e-02, PNorm = 61.5219, GNorm = 0.7718, lr_0 = 4.7118e-04
Loss = 7.6579e-02, PNorm = 61.5377, GNorm = 0.5083, lr_0 = 4.7085e-04
Loss = 8.3912e-02, PNorm = 61.5523, GNorm = 0.5328, lr_0 = 4.7053e-04
Loss = 7.5913e-02, PNorm = 61.5646, GNorm = 0.5763, lr_0 = 4.7021e-04
Loss = 8.9105e-02, PNorm = 61.5740, GNorm = 0.5902, lr_0 = 4.6989e-04
Loss = 8.7161e-02, PNorm = 61.5833, GNorm = 0.7396, lr_0 = 4.6957e-04
Loss = 7.8963e-02, PNorm = 61.5952, GNorm = 0.5801, lr_0 = 4.6924e-04
Loss = 9.0925e-02, PNorm = 61.6088, GNorm = 0.5848, lr_0 = 4.6892e-04
Loss = 7.8951e-02, PNorm = 61.6204, GNorm = 0.5908, lr_0 = 4.6860e-04
Loss = 8.3528e-02, PNorm = 61.6282, GNorm = 0.8440, lr_0 = 4.6828e-04
Loss = 7.3941e-02, PNorm = 61.6358, GNorm = 0.7354, lr_0 = 4.6796e-04
Loss = 8.4405e-02, PNorm = 61.6444, GNorm = 0.4054, lr_0 = 4.6764e-04
Loss = 8.4403e-02, PNorm = 61.6544, GNorm = 0.5439, lr_0 = 4.6732e-04
Loss = 8.5459e-02, PNorm = 61.6647, GNorm = 0.4846, lr_0 = 4.6700e-04
Loss = 8.2084e-02, PNorm = 61.6787, GNorm = 0.4802, lr_0 = 4.6668e-04
Loss = 9.5849e-02, PNorm = 61.6923, GNorm = 0.7300, lr_0 = 4.6636e-04
Loss = 8.9556e-02, PNorm = 61.7044, GNorm = 0.7827, lr_0 = 4.6604e-04
Loss = 8.9031e-02, PNorm = 61.7147, GNorm = 0.7649, lr_0 = 4.6572e-04
Loss = 8.3509e-02, PNorm = 61.7277, GNorm = 0.7617, lr_0 = 4.6540e-04
Loss = 9.1774e-02, PNorm = 61.7378, GNorm = 0.8030, lr_0 = 4.6508e-04
Loss = 8.5453e-02, PNorm = 61.7482, GNorm = 1.5418, lr_0 = 4.6476e-04
Loss = 9.4095e-02, PNorm = 61.7638, GNorm = 0.5768, lr_0 = 4.6445e-04
Loss = 7.7759e-02, PNorm = 61.7745, GNorm = 0.5796, lr_0 = 4.6413e-04
Loss = 8.4865e-02, PNorm = 61.7820, GNorm = 0.4368, lr_0 = 4.6381e-04
Loss = 8.3484e-02, PNorm = 61.7941, GNorm = 0.7703, lr_0 = 4.6349e-04
Loss = 7.7219e-02, PNorm = 61.8003, GNorm = 0.8747, lr_0 = 4.6317e-04
Loss = 8.7969e-02, PNorm = 61.8119, GNorm = 0.6191, lr_0 = 4.6286e-04
Loss = 9.4486e-02, PNorm = 61.8271, GNorm = 0.9619, lr_0 = 4.6254e-04
Loss = 7.5851e-02, PNorm = 61.8346, GNorm = 0.3973, lr_0 = 4.6222e-04
Loss = 8.2503e-02, PNorm = 61.8442, GNorm = 1.3961, lr_0 = 4.6191e-04
Loss = 7.6517e-02, PNorm = 61.8551, GNorm = 0.7428, lr_0 = 4.6159e-04
Loss = 8.6825e-02, PNorm = 61.8681, GNorm = 0.7237, lr_0 = 4.6127e-04
Loss = 8.3254e-02, PNorm = 61.8758, GNorm = 0.7080, lr_0 = 4.6096e-04
Loss = 9.1891e-02, PNorm = 61.8839, GNorm = 0.4445, lr_0 = 4.6064e-04
Loss = 8.0523e-02, PNorm = 61.8961, GNorm = 0.4218, lr_0 = 4.6033e-04
Loss = 8.3592e-02, PNorm = 61.9091, GNorm = 0.5743, lr_0 = 4.6001e-04
Loss = 9.2378e-02, PNorm = 61.9230, GNorm = 0.6430, lr_0 = 4.5970e-04
Loss = 1.0550e-01, PNorm = 61.9381, GNorm = 1.1270, lr_0 = 4.5938e-04
Loss = 9.9458e-02, PNorm = 61.9510, GNorm = 0.7468, lr_0 = 4.5907e-04
Loss = 9.5389e-02, PNorm = 61.9607, GNorm = 0.7577, lr_0 = 4.5875e-04
Loss = 8.7931e-02, PNorm = 61.9742, GNorm = 0.5671, lr_0 = 4.5844e-04
Loss = 8.8644e-02, PNorm = 61.9871, GNorm = 0.6577, lr_0 = 4.5812e-04
Loss = 8.9390e-02, PNorm = 61.9970, GNorm = 0.5772, lr_0 = 4.5781e-04
Loss = 9.4008e-02, PNorm = 62.0076, GNorm = 0.8499, lr_0 = 4.5750e-04
Loss = 8.7917e-02, PNorm = 62.0133, GNorm = 0.6758, lr_0 = 4.5718e-04
Loss = 8.3885e-02, PNorm = 62.0226, GNorm = 1.3103, lr_0 = 4.5687e-04
Loss = 9.7353e-02, PNorm = 62.0342, GNorm = 1.0850, lr_0 = 4.5656e-04
Loss = 9.1085e-02, PNorm = 62.0503, GNorm = 0.7006, lr_0 = 4.5624e-04
Loss = 8.9409e-02, PNorm = 62.0614, GNorm = 0.7447, lr_0 = 4.5593e-04
Loss = 1.0364e-01, PNorm = 62.0745, GNorm = 0.6372, lr_0 = 4.5562e-04
Loss = 1.0009e-01, PNorm = 62.0853, GNorm = 0.5966, lr_0 = 4.5531e-04
Loss = 8.5328e-02, PNorm = 62.0944, GNorm = 0.7720, lr_0 = 4.5499e-04
Loss = 9.2548e-02, PNorm = 62.1052, GNorm = 1.0484, lr_0 = 4.5468e-04
Loss = 8.2536e-02, PNorm = 62.1166, GNorm = 0.6394, lr_0 = 4.5437e-04
Loss = 7.8359e-02, PNorm = 62.1264, GNorm = 0.5642, lr_0 = 4.5406e-04
Loss = 8.6482e-02, PNorm = 62.1359, GNorm = 0.9218, lr_0 = 4.5375e-04
Loss = 8.0527e-02, PNorm = 62.1445, GNorm = 0.5893, lr_0 = 4.5344e-04
Loss = 8.0748e-02, PNorm = 62.1527, GNorm = 0.5349, lr_0 = 4.5313e-04
Loss = 8.1506e-02, PNorm = 62.1627, GNorm = 0.5669, lr_0 = 4.5282e-04
Loss = 8.7822e-02, PNorm = 62.1711, GNorm = 0.7484, lr_0 = 4.5251e-04
Loss = 8.5173e-02, PNorm = 62.1791, GNorm = 0.6602, lr_0 = 4.5220e-04
Loss = 1.0727e-01, PNorm = 62.1897, GNorm = 1.4704, lr_0 = 4.5189e-04
Loss = 9.5595e-02, PNorm = 62.2031, GNorm = 1.4356, lr_0 = 4.5158e-04
Loss = 8.4455e-02, PNorm = 62.2180, GNorm = 0.9705, lr_0 = 4.5127e-04
Loss = 9.4104e-02, PNorm = 62.2313, GNorm = 0.5055, lr_0 = 4.5096e-04
Loss = 8.8527e-02, PNorm = 62.2421, GNorm = 0.5972, lr_0 = 4.5065e-04
Loss = 7.9338e-02, PNorm = 62.2495, GNorm = 0.4990, lr_0 = 4.5034e-04
Loss = 8.0973e-02, PNorm = 62.2595, GNorm = 0.8295, lr_0 = 4.5003e-04
Loss = 9.7882e-02, PNorm = 62.2650, GNorm = 0.7438, lr_0 = 4.4972e-04
Loss = 8.7064e-02, PNorm = 62.2724, GNorm = 0.6949, lr_0 = 4.4942e-04
Loss = 8.8903e-02, PNorm = 62.2871, GNorm = 0.5122, lr_0 = 4.4911e-04
Loss = 1.0146e-01, PNorm = 62.3028, GNorm = 0.7640, lr_0 = 4.4880e-04
Loss = 8.2120e-02, PNorm = 62.3151, GNorm = 0.6092, lr_0 = 4.4849e-04
Loss = 8.2641e-02, PNorm = 62.3269, GNorm = 0.5930, lr_0 = 4.4819e-04
Loss = 8.7627e-02, PNorm = 62.3401, GNorm = 0.4744, lr_0 = 4.4788e-04
Loss = 7.6181e-02, PNorm = 62.3516, GNorm = 0.5146, lr_0 = 4.4757e-04
Loss = 8.8138e-02, PNorm = 62.3645, GNorm = 0.7758, lr_0 = 4.4727e-04
Loss = 9.4727e-02, PNorm = 62.3763, GNorm = 1.2548, lr_0 = 4.4696e-04
Loss = 9.0746e-02, PNorm = 62.3841, GNorm = 1.0863, lr_0 = 4.4665e-04
Loss = 1.0310e-01, PNorm = 62.3943, GNorm = 1.0653, lr_0 = 4.4635e-04
Loss = 8.1054e-02, PNorm = 62.4059, GNorm = 0.9128, lr_0 = 4.4604e-04
Loss = 7.9539e-02, PNorm = 62.4181, GNorm = 0.6391, lr_0 = 4.4574e-04
Loss = 8.8735e-02, PNorm = 62.4245, GNorm = 0.7322, lr_0 = 4.4543e-04
Loss = 8.4742e-02, PNorm = 62.4350, GNorm = 0.8987, lr_0 = 4.4513e-04
Loss = 9.3152e-02, PNorm = 62.4457, GNorm = 0.6365, lr_0 = 4.4482e-04
Loss = 8.2471e-02, PNorm = 62.4582, GNorm = 1.2950, lr_0 = 4.4452e-04
Loss = 8.6564e-02, PNorm = 62.4666, GNorm = 0.7619, lr_0 = 4.4421e-04
Loss = 1.0306e-01, PNorm = 62.4790, GNorm = 0.6433, lr_0 = 4.4391e-04
Loss = 8.4409e-02, PNorm = 62.4937, GNorm = 0.6771, lr_0 = 4.4360e-04
Loss = 7.6952e-02, PNorm = 62.5037, GNorm = 0.7926, lr_0 = 4.4330e-04
Loss = 9.5282e-02, PNorm = 62.5121, GNorm = 0.6161, lr_0 = 4.4299e-04
Loss = 8.7277e-02, PNorm = 62.5208, GNorm = 0.5245, lr_0 = 4.4269e-04
Loss = 9.0272e-02, PNorm = 62.5287, GNorm = 0.6748, lr_0 = 4.4239e-04
Loss = 9.1490e-02, PNorm = 62.5359, GNorm = 0.5202, lr_0 = 4.4209e-04
Loss = 8.3574e-02, PNorm = 62.5426, GNorm = 0.4571, lr_0 = 4.4178e-04
Loss = 7.3611e-02, PNorm = 62.5489, GNorm = 0.5303, lr_0 = 4.4148e-04
Loss = 9.2418e-02, PNorm = 62.5585, GNorm = 1.1539, lr_0 = 4.4118e-04
Loss = 9.6765e-02, PNorm = 62.5685, GNorm = 1.2085, lr_0 = 4.4088e-04
Loss = 8.4756e-02, PNorm = 62.5741, GNorm = 0.5859, lr_0 = 4.4057e-04
Loss = 8.1539e-02, PNorm = 62.5807, GNorm = 0.6039, lr_0 = 4.4027e-04
Loss = 8.2267e-02, PNorm = 62.5924, GNorm = 0.9309, lr_0 = 4.3997e-04
Loss = 9.1084e-02, PNorm = 62.6067, GNorm = 0.5688, lr_0 = 4.3967e-04
Loss = 9.7152e-02, PNorm = 62.6173, GNorm = 0.9711, lr_0 = 4.3937e-04
Validation mae = 0.400735
Epoch 12
Loss = 7.1619e-02, PNorm = 62.6264, GNorm = 0.9645, lr_0 = 4.3907e-04
Loss = 7.7998e-02, PNorm = 62.6366, GNorm = 0.9065, lr_0 = 4.3877e-04
Loss = 6.6661e-02, PNorm = 62.6461, GNorm = 0.5239, lr_0 = 4.3846e-04
Loss = 8.5482e-02, PNorm = 62.6541, GNorm = 0.4968, lr_0 = 4.3816e-04
Loss = 7.4102e-02, PNorm = 62.6622, GNorm = 0.6406, lr_0 = 4.3786e-04
Loss = 9.0126e-02, PNorm = 62.6741, GNorm = 0.7092, lr_0 = 4.3756e-04
Loss = 7.7522e-02, PNorm = 62.6877, GNorm = 0.5566, lr_0 = 4.3726e-04
Loss = 8.0444e-02, PNorm = 62.7007, GNorm = 0.5160, lr_0 = 4.3696e-04
Loss = 7.2800e-02, PNorm = 62.7076, GNorm = 0.8180, lr_0 = 4.3667e-04
Loss = 8.0522e-02, PNorm = 62.7143, GNorm = 0.8660, lr_0 = 4.3637e-04
Loss = 8.2181e-02, PNorm = 62.7266, GNorm = 0.5360, lr_0 = 4.3607e-04
Loss = 6.7376e-02, PNorm = 62.7364, GNorm = 0.5957, lr_0 = 4.3577e-04
Loss = 7.5990e-02, PNorm = 62.7433, GNorm = 0.4032, lr_0 = 4.3547e-04
Loss = 8.0117e-02, PNorm = 62.7515, GNorm = 1.0786, lr_0 = 4.3517e-04
Loss = 7.8003e-02, PNorm = 62.7659, GNorm = 0.7290, lr_0 = 4.3487e-04
Loss = 8.1512e-02, PNorm = 62.7794, GNorm = 0.4175, lr_0 = 4.3458e-04
Loss = 8.2747e-02, PNorm = 62.7923, GNorm = 0.5420, lr_0 = 4.3428e-04
Loss = 7.1381e-02, PNorm = 62.8032, GNorm = 0.7206, lr_0 = 4.3398e-04
Loss = 8.1231e-02, PNorm = 62.8136, GNorm = 0.6918, lr_0 = 4.3368e-04
Loss = 7.8588e-02, PNorm = 62.8267, GNorm = 0.7634, lr_0 = 4.3339e-04
Loss = 8.3287e-02, PNorm = 62.8359, GNorm = 0.8789, lr_0 = 4.3309e-04
Loss = 8.5033e-02, PNorm = 62.8450, GNorm = 1.3117, lr_0 = 4.3279e-04
Loss = 7.3305e-02, PNorm = 62.8541, GNorm = 0.5518, lr_0 = 4.3250e-04
Loss = 7.1880e-02, PNorm = 62.8634, GNorm = 0.5106, lr_0 = 4.3220e-04
Loss = 6.6382e-02, PNorm = 62.8738, GNorm = 0.4905, lr_0 = 4.3190e-04
Loss = 7.7305e-02, PNorm = 62.8835, GNorm = 0.7270, lr_0 = 4.3161e-04
Loss = 8.2449e-02, PNorm = 62.8899, GNorm = 0.5985, lr_0 = 4.3131e-04
Loss = 6.8975e-02, PNorm = 62.8990, GNorm = 0.9781, lr_0 = 4.3102e-04
Loss = 9.4615e-02, PNorm = 62.9105, GNorm = 0.6512, lr_0 = 4.3072e-04
Loss = 7.9716e-02, PNorm = 62.9198, GNorm = 0.5892, lr_0 = 4.3043e-04
Loss = 7.3652e-02, PNorm = 62.9291, GNorm = 0.7269, lr_0 = 4.3013e-04
Loss = 7.8288e-02, PNorm = 62.9382, GNorm = 0.6442, lr_0 = 4.2984e-04
Loss = 7.8059e-02, PNorm = 62.9479, GNorm = 0.5509, lr_0 = 4.2954e-04
Loss = 7.2984e-02, PNorm = 62.9573, GNorm = 0.6683, lr_0 = 4.2925e-04
Loss = 7.7882e-02, PNorm = 62.9703, GNorm = 0.8006, lr_0 = 4.2895e-04
Loss = 8.6011e-02, PNorm = 62.9818, GNorm = 1.4807, lr_0 = 4.2866e-04
Loss = 8.0549e-02, PNorm = 62.9939, GNorm = 0.4617, lr_0 = 4.2837e-04
Loss = 7.8145e-02, PNorm = 63.0090, GNorm = 0.6419, lr_0 = 4.2807e-04
Loss = 8.0016e-02, PNorm = 63.0199, GNorm = 0.8413, lr_0 = 4.2778e-04
Loss = 7.5961e-02, PNorm = 63.0304, GNorm = 0.8894, lr_0 = 4.2749e-04
Loss = 6.8061e-02, PNorm = 63.0385, GNorm = 0.4543, lr_0 = 4.2719e-04
Loss = 7.5036e-02, PNorm = 63.0483, GNorm = 0.5054, lr_0 = 4.2690e-04
Loss = 9.8480e-02, PNorm = 63.0608, GNorm = 0.6298, lr_0 = 4.2661e-04
Loss = 8.1499e-02, PNorm = 63.0673, GNorm = 0.5356, lr_0 = 4.2632e-04
Loss = 7.8471e-02, PNorm = 63.0747, GNorm = 0.6151, lr_0 = 4.2602e-04
Loss = 7.3481e-02, PNorm = 63.0819, GNorm = 0.6148, lr_0 = 4.2573e-04
Loss = 7.2058e-02, PNorm = 63.0940, GNorm = 0.6078, lr_0 = 4.2544e-04
Loss = 7.8266e-02, PNorm = 63.1042, GNorm = 0.5812, lr_0 = 4.2515e-04
Loss = 8.7345e-02, PNorm = 63.1124, GNorm = 0.6533, lr_0 = 4.2486e-04
Loss = 7.8876e-02, PNorm = 63.1212, GNorm = 0.8091, lr_0 = 4.2457e-04
Loss = 8.0091e-02, PNorm = 63.1298, GNorm = 1.1883, lr_0 = 4.2428e-04
Loss = 7.6069e-02, PNorm = 63.1387, GNorm = 0.6769, lr_0 = 4.2399e-04
Loss = 7.2371e-02, PNorm = 63.1472, GNorm = 0.7106, lr_0 = 4.2370e-04
Loss = 7.7560e-02, PNorm = 63.1550, GNorm = 0.4591, lr_0 = 4.2340e-04
Loss = 7.0796e-02, PNorm = 63.1624, GNorm = 0.6221, lr_0 = 4.2311e-04
Loss = 9.4217e-02, PNorm = 63.1714, GNorm = 0.9007, lr_0 = 4.2283e-04
Loss = 8.2339e-02, PNorm = 63.1808, GNorm = 0.5513, lr_0 = 4.2254e-04
Loss = 8.0426e-02, PNorm = 63.1902, GNorm = 0.7264, lr_0 = 4.2225e-04
Loss = 8.3998e-02, PNorm = 63.2032, GNorm = 0.7878, lr_0 = 4.2196e-04
Loss = 7.6866e-02, PNorm = 63.2142, GNorm = 0.6663, lr_0 = 4.2167e-04
Loss = 6.9997e-02, PNorm = 63.2226, GNorm = 0.4459, lr_0 = 4.2138e-04
Loss = 8.9337e-02, PNorm = 63.2321, GNorm = 0.6601, lr_0 = 4.2109e-04
Loss = 7.8859e-02, PNorm = 63.2442, GNorm = 0.6350, lr_0 = 4.2080e-04
Loss = 1.0103e-01, PNorm = 63.2532, GNorm = 0.6615, lr_0 = 4.2051e-04
Loss = 8.9670e-02, PNorm = 63.2628, GNorm = 0.5530, lr_0 = 4.2023e-04
Loss = 7.6787e-02, PNorm = 63.2751, GNorm = 0.9035, lr_0 = 4.1994e-04
Loss = 8.6298e-02, PNorm = 63.2880, GNorm = 1.3086, lr_0 = 4.1965e-04
Loss = 8.9327e-02, PNorm = 63.2988, GNorm = 0.5974, lr_0 = 4.1936e-04
Loss = 6.9710e-02, PNorm = 63.3076, GNorm = 0.4851, lr_0 = 4.1907e-04
Loss = 7.9390e-02, PNorm = 63.3132, GNorm = 0.4643, lr_0 = 4.1879e-04
Loss = 8.9945e-02, PNorm = 63.3231, GNorm = 0.8343, lr_0 = 4.1850e-04
Loss = 6.9527e-02, PNorm = 63.3350, GNorm = 0.6123, lr_0 = 4.1821e-04
Loss = 7.0983e-02, PNorm = 63.3452, GNorm = 0.4801, lr_0 = 4.1793e-04
Loss = 7.4785e-02, PNorm = 63.3516, GNorm = 1.1304, lr_0 = 4.1764e-04
Loss = 7.8327e-02, PNorm = 63.3582, GNorm = 0.6350, lr_0 = 4.1736e-04
Loss = 7.2991e-02, PNorm = 63.3638, GNorm = 0.5579, lr_0 = 4.1707e-04
Loss = 9.3264e-02, PNorm = 63.3688, GNorm = 0.7082, lr_0 = 4.1678e-04
Loss = 8.4417e-02, PNorm = 63.3782, GNorm = 0.5084, lr_0 = 4.1650e-04
Loss = 6.6661e-02, PNorm = 63.3856, GNorm = 0.4313, lr_0 = 4.1621e-04
Loss = 8.0691e-02, PNorm = 63.3932, GNorm = 0.7808, lr_0 = 4.1593e-04
Loss = 9.5067e-02, PNorm = 63.4032, GNorm = 0.7351, lr_0 = 4.1564e-04
Loss = 9.3965e-02, PNorm = 63.4113, GNorm = 0.5759, lr_0 = 4.1536e-04
Loss = 9.1109e-02, PNorm = 63.4180, GNorm = 0.5917, lr_0 = 4.1507e-04
Loss = 8.3628e-02, PNorm = 63.4274, GNorm = 0.5805, lr_0 = 4.1479e-04
Loss = 7.6444e-02, PNorm = 63.4369, GNorm = 0.8049, lr_0 = 4.1450e-04
Loss = 8.1367e-02, PNorm = 63.4475, GNorm = 0.6014, lr_0 = 4.1422e-04
Loss = 8.5868e-02, PNorm = 63.4560, GNorm = 0.6499, lr_0 = 4.1394e-04
Loss = 8.9299e-02, PNorm = 63.4687, GNorm = 0.9342, lr_0 = 4.1365e-04
Loss = 7.9402e-02, PNorm = 63.4797, GNorm = 0.7457, lr_0 = 4.1337e-04
Loss = 6.6963e-02, PNorm = 63.4881, GNorm = 0.6519, lr_0 = 4.1309e-04
Loss = 7.4826e-02, PNorm = 63.4927, GNorm = 0.6268, lr_0 = 4.1280e-04
Loss = 7.5564e-02, PNorm = 63.5002, GNorm = 0.4247, lr_0 = 4.1252e-04
Loss = 9.1481e-02, PNorm = 63.5125, GNorm = 0.6093, lr_0 = 4.1224e-04
Loss = 9.0781e-02, PNorm = 63.5252, GNorm = 0.8079, lr_0 = 4.1196e-04
Loss = 9.4656e-02, PNorm = 63.5337, GNorm = 1.6387, lr_0 = 4.1167e-04
Loss = 9.0652e-02, PNorm = 63.5475, GNorm = 0.6702, lr_0 = 4.1139e-04
Loss = 8.5431e-02, PNorm = 63.5617, GNorm = 0.6068, lr_0 = 4.1111e-04
Loss = 7.4114e-02, PNorm = 63.5710, GNorm = 0.7080, lr_0 = 4.1083e-04
Loss = 7.4919e-02, PNorm = 63.5781, GNorm = 1.2192, lr_0 = 4.1055e-04
Loss = 8.8089e-02, PNorm = 63.5851, GNorm = 0.4683, lr_0 = 4.1027e-04
Loss = 7.7435e-02, PNorm = 63.5939, GNorm = 0.7188, lr_0 = 4.0998e-04
Loss = 7.8871e-02, PNorm = 63.6024, GNorm = 0.7359, lr_0 = 4.0970e-04
Loss = 9.4597e-02, PNorm = 63.6120, GNorm = 0.7122, lr_0 = 4.0942e-04
Loss = 6.8863e-02, PNorm = 63.6183, GNorm = 1.0464, lr_0 = 4.0914e-04
Loss = 8.1854e-02, PNorm = 63.6250, GNorm = 0.7477, lr_0 = 4.0886e-04
Loss = 8.4505e-02, PNorm = 63.6329, GNorm = 1.3579, lr_0 = 4.0858e-04
Loss = 8.1848e-02, PNorm = 63.6388, GNorm = 0.6157, lr_0 = 4.0830e-04
Loss = 7.8338e-02, PNorm = 63.6475, GNorm = 0.6226, lr_0 = 4.0802e-04
Loss = 8.3424e-02, PNorm = 63.6580, GNorm = 0.3785, lr_0 = 4.0774e-04
Loss = 8.2431e-02, PNorm = 63.6687, GNorm = 0.6210, lr_0 = 4.0746e-04
Loss = 8.4134e-02, PNorm = 63.6793, GNorm = 0.4628, lr_0 = 4.0718e-04
Loss = 8.5702e-02, PNorm = 63.6857, GNorm = 1.5077, lr_0 = 4.0691e-04
Loss = 8.4054e-02, PNorm = 63.6926, GNorm = 0.8086, lr_0 = 4.0663e-04
Loss = 8.8773e-02, PNorm = 63.7052, GNorm = 1.0123, lr_0 = 4.0635e-04
Loss = 7.9950e-02, PNorm = 63.7159, GNorm = 0.6686, lr_0 = 4.0607e-04
Loss = 8.4667e-02, PNorm = 63.7236, GNorm = 0.8748, lr_0 = 4.0579e-04
Loss = 8.8766e-02, PNorm = 63.7318, GNorm = 0.6688, lr_0 = 4.0551e-04
Loss = 8.9904e-02, PNorm = 63.7427, GNorm = 0.4974, lr_0 = 4.0524e-04
Loss = 6.7318e-02, PNorm = 63.7514, GNorm = 0.5595, lr_0 = 4.0496e-04
Loss = 6.6665e-02, PNorm = 63.7600, GNorm = 0.5316, lr_0 = 4.0468e-04
Validation mae = 0.399466
Epoch 13
Loss = 7.8744e-02, PNorm = 63.7678, GNorm = 0.5283, lr_0 = 4.0440e-04
Loss = 5.9520e-02, PNorm = 63.7747, GNorm = 0.6418, lr_0 = 4.0413e-04
Loss = 6.7351e-02, PNorm = 63.7811, GNorm = 0.4859, lr_0 = 4.0385e-04
Loss = 6.0211e-02, PNorm = 63.7900, GNorm = 0.5933, lr_0 = 4.0357e-04
Loss = 6.8321e-02, PNorm = 63.8027, GNorm = 0.7180, lr_0 = 4.0330e-04
Loss = 6.6178e-02, PNorm = 63.8120, GNorm = 0.5515, lr_0 = 4.0302e-04
Loss = 7.0799e-02, PNorm = 63.8215, GNorm = 1.0241, lr_0 = 4.0274e-04
Loss = 7.7683e-02, PNorm = 63.8303, GNorm = 0.8001, lr_0 = 4.0247e-04
Loss = 7.8439e-02, PNorm = 63.8374, GNorm = 1.0866, lr_0 = 4.0219e-04
Loss = 7.3725e-02, PNorm = 63.8443, GNorm = 0.7282, lr_0 = 4.0192e-04
Loss = 7.1968e-02, PNorm = 63.8520, GNorm = 0.3465, lr_0 = 4.0164e-04
Loss = 6.9139e-02, PNorm = 63.8632, GNorm = 0.4975, lr_0 = 4.0137e-04
Loss = 8.0009e-02, PNorm = 63.8722, GNorm = 0.5616, lr_0 = 4.0109e-04
Loss = 7.5506e-02, PNorm = 63.8790, GNorm = 0.6704, lr_0 = 4.0082e-04
Loss = 6.8529e-02, PNorm = 63.8882, GNorm = 0.6042, lr_0 = 4.0054e-04
Loss = 7.0948e-02, PNorm = 63.9003, GNorm = 1.4055, lr_0 = 4.0027e-04
Loss = 7.6027e-02, PNorm = 63.9112, GNorm = 0.6037, lr_0 = 3.9999e-04
Loss = 6.6647e-02, PNorm = 63.9203, GNorm = 0.5783, lr_0 = 3.9972e-04
Loss = 6.8663e-02, PNorm = 63.9297, GNorm = 0.8081, lr_0 = 3.9945e-04
Loss = 7.4177e-02, PNorm = 63.9371, GNorm = 0.4300, lr_0 = 3.9917e-04
Loss = 8.5110e-02, PNorm = 63.9478, GNorm = 0.6094, lr_0 = 3.9890e-04
Loss = 7.1140e-02, PNorm = 63.9573, GNorm = 0.4959, lr_0 = 3.9863e-04
Loss = 6.9060e-02, PNorm = 63.9636, GNorm = 0.5149, lr_0 = 3.9835e-04
Loss = 7.4473e-02, PNorm = 63.9723, GNorm = 0.4765, lr_0 = 3.9808e-04
Loss = 6.9941e-02, PNorm = 63.9806, GNorm = 0.4878, lr_0 = 3.9781e-04
Loss = 8.4205e-02, PNorm = 63.9857, GNorm = 1.3196, lr_0 = 3.9753e-04
Loss = 6.9908e-02, PNorm = 63.9917, GNorm = 0.4736, lr_0 = 3.9726e-04
Loss = 6.3420e-02, PNorm = 64.0015, GNorm = 0.5398, lr_0 = 3.9699e-04
Loss = 7.6635e-02, PNorm = 64.0140, GNorm = 0.5410, lr_0 = 3.9672e-04
Loss = 6.8257e-02, PNorm = 64.0228, GNorm = 0.7275, lr_0 = 3.9645e-04
Loss = 7.6266e-02, PNorm = 64.0353, GNorm = 0.8903, lr_0 = 3.9617e-04
Loss = 6.5775e-02, PNorm = 64.0440, GNorm = 0.4524, lr_0 = 3.9590e-04
Loss = 7.8224e-02, PNorm = 64.0515, GNorm = 0.6457, lr_0 = 3.9563e-04
Loss = 7.3014e-02, PNorm = 64.0595, GNorm = 0.6837, lr_0 = 3.9536e-04
Loss = 8.1269e-02, PNorm = 64.0678, GNorm = 0.5797, lr_0 = 3.9509e-04
Loss = 7.2599e-02, PNorm = 64.0755, GNorm = 0.6578, lr_0 = 3.9482e-04
Loss = 7.9505e-02, PNorm = 64.0819, GNorm = 0.7816, lr_0 = 3.9455e-04
Loss = 7.3467e-02, PNorm = 64.0910, GNorm = 0.5800, lr_0 = 3.9428e-04
Loss = 8.4306e-02, PNorm = 64.0999, GNorm = 1.3705, lr_0 = 3.9401e-04
Loss = 7.5601e-02, PNorm = 64.1086, GNorm = 0.5789, lr_0 = 3.9374e-04
Loss = 7.5733e-02, PNorm = 64.1160, GNorm = 0.5786, lr_0 = 3.9347e-04
Loss = 7.6380e-02, PNorm = 64.1276, GNorm = 0.5564, lr_0 = 3.9320e-04
Loss = 7.1168e-02, PNorm = 64.1440, GNorm = 0.5401, lr_0 = 3.9293e-04
Loss = 7.4062e-02, PNorm = 64.1542, GNorm = 0.6057, lr_0 = 3.9266e-04
Loss = 7.8573e-02, PNorm = 64.1605, GNorm = 0.4939, lr_0 = 3.9239e-04
Loss = 7.2760e-02, PNorm = 64.1645, GNorm = 0.5357, lr_0 = 3.9212e-04
Loss = 7.5429e-02, PNorm = 64.1724, GNorm = 0.5359, lr_0 = 3.9185e-04
Loss = 6.7918e-02, PNorm = 64.1792, GNorm = 0.4258, lr_0 = 3.9159e-04
Loss = 8.3255e-02, PNorm = 64.1869, GNorm = 0.9846, lr_0 = 3.9132e-04
Loss = 7.0461e-02, PNorm = 64.1955, GNorm = 0.6520, lr_0 = 3.9105e-04
Loss = 7.3237e-02, PNorm = 64.2029, GNorm = 0.4697, lr_0 = 3.9078e-04
Loss = 7.3345e-02, PNorm = 64.2090, GNorm = 0.9136, lr_0 = 3.9051e-04
Loss = 8.0389e-02, PNorm = 64.2172, GNorm = 0.6007, lr_0 = 3.9025e-04
Loss = 7.0504e-02, PNorm = 64.2280, GNorm = 1.0042, lr_0 = 3.8998e-04
Loss = 7.2990e-02, PNorm = 64.2389, GNorm = 0.8520, lr_0 = 3.8971e-04
Loss = 7.3526e-02, PNorm = 64.2464, GNorm = 0.5585, lr_0 = 3.8945e-04
Loss = 7.0739e-02, PNorm = 64.2535, GNorm = 0.5657, lr_0 = 3.8918e-04
Loss = 8.5722e-02, PNorm = 64.2632, GNorm = 0.4516, lr_0 = 3.8891e-04
Loss = 7.5812e-02, PNorm = 64.2700, GNorm = 0.5748, lr_0 = 3.8865e-04
Loss = 6.8763e-02, PNorm = 64.2756, GNorm = 1.0685, lr_0 = 3.8838e-04
Loss = 6.4787e-02, PNorm = 64.2785, GNorm = 0.5208, lr_0 = 3.8811e-04
Loss = 7.5842e-02, PNorm = 64.2871, GNorm = 0.6727, lr_0 = 3.8785e-04
Loss = 7.0240e-02, PNorm = 64.2958, GNorm = 0.4830, lr_0 = 3.8758e-04
Loss = 7.9513e-02, PNorm = 64.3024, GNorm = 0.5681, lr_0 = 3.8732e-04
Loss = 6.6239e-02, PNorm = 64.3093, GNorm = 0.4734, lr_0 = 3.8705e-04
Loss = 7.2865e-02, PNorm = 64.3165, GNorm = 0.4276, lr_0 = 3.8679e-04
Loss = 7.2879e-02, PNorm = 64.3215, GNorm = 0.5982, lr_0 = 3.8652e-04
Loss = 7.0386e-02, PNorm = 64.3293, GNorm = 0.5625, lr_0 = 3.8626e-04
Loss = 8.0573e-02, PNorm = 64.3373, GNorm = 0.6532, lr_0 = 3.8599e-04
Loss = 7.7037e-02, PNorm = 64.3448, GNorm = 0.5028, lr_0 = 3.8573e-04
Loss = 7.3248e-02, PNorm = 64.3533, GNorm = 0.6425, lr_0 = 3.8546e-04
Loss = 7.9147e-02, PNorm = 64.3645, GNorm = 0.6564, lr_0 = 3.8520e-04
Loss = 7.6349e-02, PNorm = 64.3728, GNorm = 0.5524, lr_0 = 3.8493e-04
Loss = 6.5027e-02, PNorm = 64.3800, GNorm = 0.8090, lr_0 = 3.8467e-04
Loss = 7.9340e-02, PNorm = 64.3880, GNorm = 0.7174, lr_0 = 3.8441e-04
Loss = 7.1366e-02, PNorm = 64.3943, GNorm = 0.4966, lr_0 = 3.8414e-04
Loss = 6.9008e-02, PNorm = 64.3975, GNorm = 0.6244, lr_0 = 3.8388e-04
Loss = 8.3634e-02, PNorm = 64.4045, GNorm = 0.4740, lr_0 = 3.8362e-04
Loss = 6.7145e-02, PNorm = 64.4134, GNorm = 0.6499, lr_0 = 3.8336e-04
Loss = 7.5310e-02, PNorm = 64.4218, GNorm = 0.9190, lr_0 = 3.8309e-04
Loss = 7.8697e-02, PNorm = 64.4328, GNorm = 0.5853, lr_0 = 3.8283e-04
Loss = 7.7682e-02, PNorm = 64.4439, GNorm = 0.6023, lr_0 = 3.8257e-04
Loss = 7.5839e-02, PNorm = 64.4524, GNorm = 1.0725, lr_0 = 3.8231e-04
Loss = 6.9431e-02, PNorm = 64.4591, GNorm = 0.5096, lr_0 = 3.8204e-04
Loss = 7.0684e-02, PNorm = 64.4684, GNorm = 0.9888, lr_0 = 3.8178e-04
Loss = 7.9270e-02, PNorm = 64.4772, GNorm = 0.8558, lr_0 = 3.8152e-04
Loss = 7.6015e-02, PNorm = 64.4839, GNorm = 0.7012, lr_0 = 3.8126e-04
Loss = 7.8378e-02, PNorm = 64.4930, GNorm = 0.6441, lr_0 = 3.8100e-04
Loss = 7.8914e-02, PNorm = 64.4997, GNorm = 0.7045, lr_0 = 3.8074e-04
Loss = 7.1125e-02, PNorm = 64.5078, GNorm = 0.5682, lr_0 = 3.8048e-04
Loss = 7.4621e-02, PNorm = 64.5141, GNorm = 0.5662, lr_0 = 3.8022e-04
Loss = 7.5760e-02, PNorm = 64.5209, GNorm = 0.7417, lr_0 = 3.7995e-04
Loss = 8.2670e-02, PNorm = 64.5311, GNorm = 1.2857, lr_0 = 3.7969e-04
Loss = 7.9734e-02, PNorm = 64.5429, GNorm = 0.9610, lr_0 = 3.7943e-04
Loss = 7.9452e-02, PNorm = 64.5518, GNorm = 0.5516, lr_0 = 3.7917e-04
Loss = 7.6364e-02, PNorm = 64.5596, GNorm = 0.5610, lr_0 = 3.7891e-04
Loss = 8.8467e-02, PNorm = 64.5668, GNorm = 0.6971, lr_0 = 3.7866e-04
Loss = 8.0817e-02, PNorm = 64.5740, GNorm = 0.6924, lr_0 = 3.7840e-04
Loss = 7.4412e-02, PNorm = 64.5843, GNorm = 0.6392, lr_0 = 3.7814e-04
Loss = 7.9159e-02, PNorm = 64.5965, GNorm = 0.4711, lr_0 = 3.7788e-04
Loss = 7.5809e-02, PNorm = 64.6051, GNorm = 0.7401, lr_0 = 3.7762e-04
Loss = 6.7713e-02, PNorm = 64.6118, GNorm = 0.7392, lr_0 = 3.7736e-04
Loss = 8.4248e-02, PNorm = 64.6167, GNorm = 0.5835, lr_0 = 3.7710e-04
Loss = 8.4902e-02, PNorm = 64.6238, GNorm = 1.4213, lr_0 = 3.7684e-04
Loss = 8.6060e-02, PNorm = 64.6355, GNorm = 0.6193, lr_0 = 3.7659e-04
Loss = 8.1374e-02, PNorm = 64.6465, GNorm = 0.6001, lr_0 = 3.7633e-04
Loss = 7.7250e-02, PNorm = 64.6566, GNorm = 1.0273, lr_0 = 3.7607e-04
Loss = 8.3536e-02, PNorm = 64.6646, GNorm = 0.5821, lr_0 = 3.7581e-04
Loss = 7.5448e-02, PNorm = 64.6699, GNorm = 0.8803, lr_0 = 3.7555e-04
Loss = 8.4895e-02, PNorm = 64.6772, GNorm = 0.7987, lr_0 = 3.7530e-04
Loss = 7.4550e-02, PNorm = 64.6842, GNorm = 0.5881, lr_0 = 3.7504e-04
Loss = 7.1111e-02, PNorm = 64.6920, GNorm = 0.5676, lr_0 = 3.7478e-04
Loss = 9.0710e-02, PNorm = 64.6999, GNorm = 0.8563, lr_0 = 3.7453e-04
Loss = 7.2936e-02, PNorm = 64.7085, GNorm = 0.5474, lr_0 = 3.7427e-04
Loss = 8.8830e-02, PNorm = 64.7167, GNorm = 0.6206, lr_0 = 3.7401e-04
Loss = 8.9189e-02, PNorm = 64.7257, GNorm = 0.6643, lr_0 = 3.7376e-04
Loss = 6.6820e-02, PNorm = 64.7342, GNorm = 0.8324, lr_0 = 3.7350e-04
Loss = 8.7280e-02, PNorm = 64.7402, GNorm = 0.7085, lr_0 = 3.7325e-04
Loss = 7.3451e-02, PNorm = 64.7474, GNorm = 0.5093, lr_0 = 3.7299e-04
Loss = 7.2280e-02, PNorm = 64.7552, GNorm = 0.5743, lr_0 = 3.7273e-04
Validation mae = 0.391762
Epoch 14
Loss = 6.3413e-02, PNorm = 64.7668, GNorm = 0.5619, lr_0 = 3.7248e-04
Loss = 6.9784e-02, PNorm = 64.7767, GNorm = 0.4391, lr_0 = 3.7222e-04
Loss = 6.2865e-02, PNorm = 64.7838, GNorm = 0.4903, lr_0 = 3.7197e-04
Loss = 7.2422e-02, PNorm = 64.7936, GNorm = 0.4097, lr_0 = 3.7171e-04
Loss = 6.3632e-02, PNorm = 64.8021, GNorm = 0.4735, lr_0 = 3.7146e-04
Loss = 6.2645e-02, PNorm = 64.8091, GNorm = 0.6153, lr_0 = 3.7120e-04
Loss = 6.2297e-02, PNorm = 64.8148, GNorm = 0.4955, lr_0 = 3.7095e-04
Loss = 7.3589e-02, PNorm = 64.8230, GNorm = 0.7069, lr_0 = 3.7070e-04
Loss = 6.2056e-02, PNorm = 64.8321, GNorm = 0.8503, lr_0 = 3.7044e-04
Loss = 7.4623e-02, PNorm = 64.8438, GNorm = 0.5605, lr_0 = 3.7019e-04
Loss = 7.0710e-02, PNorm = 64.8542, GNorm = 0.6939, lr_0 = 3.6993e-04
Loss = 7.2015e-02, PNorm = 64.8648, GNorm = 0.7394, lr_0 = 3.6968e-04
Loss = 5.8649e-02, PNorm = 64.8739, GNorm = 0.6728, lr_0 = 3.6943e-04
Loss = 6.3053e-02, PNorm = 64.8802, GNorm = 0.5342, lr_0 = 3.6917e-04
Loss = 6.1462e-02, PNorm = 64.8893, GNorm = 0.7207, lr_0 = 3.6892e-04
Loss = 5.7857e-02, PNorm = 64.8987, GNorm = 0.5206, lr_0 = 3.6867e-04
Loss = 6.8166e-02, PNorm = 64.9082, GNorm = 0.5805, lr_0 = 3.6842e-04
Loss = 6.3849e-02, PNorm = 64.9140, GNorm = 0.3831, lr_0 = 3.6816e-04
Loss = 6.4530e-02, PNorm = 64.9203, GNorm = 0.6128, lr_0 = 3.6791e-04
Loss = 6.3986e-02, PNorm = 64.9301, GNorm = 0.6503, lr_0 = 3.6766e-04
Loss = 6.6572e-02, PNorm = 64.9391, GNorm = 0.5647, lr_0 = 3.6741e-04
Loss = 6.7563e-02, PNorm = 64.9485, GNorm = 0.4645, lr_0 = 3.6716e-04
Loss = 6.3455e-02, PNorm = 64.9571, GNorm = 0.5085, lr_0 = 3.6690e-04
Loss = 6.8431e-02, PNorm = 64.9617, GNorm = 0.6563, lr_0 = 3.6665e-04
Loss = 7.3544e-02, PNorm = 64.9668, GNorm = 0.5557, lr_0 = 3.6640e-04
Loss = 6.4951e-02, PNorm = 64.9725, GNorm = 0.5472, lr_0 = 3.6615e-04
Loss = 8.3319e-02, PNorm = 64.9777, GNorm = 0.9889, lr_0 = 3.6590e-04
Loss = 7.2626e-02, PNorm = 64.9866, GNorm = 0.5752, lr_0 = 3.6565e-04
Loss = 8.2295e-02, PNorm = 64.9992, GNorm = 0.4982, lr_0 = 3.6540e-04
Loss = 8.5681e-02, PNorm = 65.0109, GNorm = 0.5180, lr_0 = 3.6515e-04
Loss = 8.9090e-02, PNorm = 65.0210, GNorm = 0.4699, lr_0 = 3.6490e-04
Loss = 6.1274e-02, PNorm = 65.0310, GNorm = 0.8631, lr_0 = 3.6465e-04
Loss = 7.4293e-02, PNorm = 65.0374, GNorm = 0.5234, lr_0 = 3.6440e-04
Loss = 8.1775e-02, PNorm = 65.0450, GNorm = 0.5534, lr_0 = 3.6415e-04
Loss = 7.1262e-02, PNorm = 65.0547, GNorm = 1.1231, lr_0 = 3.6390e-04
Loss = 6.9609e-02, PNorm = 65.0646, GNorm = 0.5273, lr_0 = 3.6365e-04
Loss = 5.9442e-02, PNorm = 65.0728, GNorm = 0.6269, lr_0 = 3.6340e-04
Loss = 6.4395e-02, PNorm = 65.0788, GNorm = 0.4308, lr_0 = 3.6315e-04
Loss = 8.0707e-02, PNorm = 65.0879, GNorm = 0.4891, lr_0 = 3.6290e-04
Loss = 6.7122e-02, PNorm = 65.0967, GNorm = 0.5267, lr_0 = 3.6266e-04
Loss = 5.4177e-02, PNorm = 65.1042, GNorm = 0.4737, lr_0 = 3.6241e-04
Loss = 7.9892e-02, PNorm = 65.1100, GNorm = 1.3300, lr_0 = 3.6216e-04
Loss = 7.2144e-02, PNorm = 65.1182, GNorm = 0.7093, lr_0 = 3.6191e-04
Loss = 6.7279e-02, PNorm = 65.1279, GNorm = 0.7721, lr_0 = 3.6166e-04
Loss = 6.1700e-02, PNorm = 65.1385, GNorm = 0.8463, lr_0 = 3.6141e-04
Loss = 6.4900e-02, PNorm = 65.1411, GNorm = 0.5673, lr_0 = 3.6117e-04
Loss = 7.5863e-02, PNorm = 65.1453, GNorm = 0.9126, lr_0 = 3.6092e-04
Loss = 6.2321e-02, PNorm = 65.1528, GNorm = 0.5782, lr_0 = 3.6067e-04
Loss = 7.1632e-02, PNorm = 65.1607, GNorm = 0.6141, lr_0 = 3.6043e-04
Loss = 8.4505e-02, PNorm = 65.1719, GNorm = 0.9341, lr_0 = 3.6018e-04
Loss = 8.2343e-02, PNorm = 65.1836, GNorm = 1.1157, lr_0 = 3.5993e-04
Loss = 6.7241e-02, PNorm = 65.1944, GNorm = 0.6730, lr_0 = 3.5969e-04
Loss = 6.4037e-02, PNorm = 65.2036, GNorm = 0.5753, lr_0 = 3.5944e-04
Loss = 8.4686e-02, PNorm = 65.2115, GNorm = 0.8570, lr_0 = 3.5919e-04
Loss = 7.7468e-02, PNorm = 65.2235, GNorm = 0.6811, lr_0 = 3.5895e-04
Loss = 7.5443e-02, PNorm = 65.2331, GNorm = 0.8182, lr_0 = 3.5870e-04
Loss = 6.8556e-02, PNorm = 65.2379, GNorm = 0.7977, lr_0 = 3.5845e-04
Loss = 7.8588e-02, PNorm = 65.2447, GNorm = 0.7610, lr_0 = 3.5821e-04
Loss = 7.7777e-02, PNorm = 65.2550, GNorm = 0.7309, lr_0 = 3.5796e-04
Loss = 6.6785e-02, PNorm = 65.2605, GNorm = 0.5045, lr_0 = 3.5772e-04
Loss = 6.2577e-02, PNorm = 65.2661, GNorm = 0.4966, lr_0 = 3.5747e-04
Loss = 7.0282e-02, PNorm = 65.2723, GNorm = 0.5722, lr_0 = 3.5723e-04
Loss = 6.1913e-02, PNorm = 65.2778, GNorm = 0.4596, lr_0 = 3.5698e-04
Loss = 6.7793e-02, PNorm = 65.2823, GNorm = 0.5146, lr_0 = 3.5674e-04
Loss = 6.7145e-02, PNorm = 65.2903, GNorm = 0.5941, lr_0 = 3.5650e-04
Loss = 7.4583e-02, PNorm = 65.2950, GNorm = 0.7825, lr_0 = 3.5625e-04
Loss = 7.8193e-02, PNorm = 65.3023, GNorm = 0.8909, lr_0 = 3.5601e-04
Loss = 7.6719e-02, PNorm = 65.3097, GNorm = 0.7228, lr_0 = 3.5576e-04
Loss = 8.9682e-02, PNorm = 65.3150, GNorm = 0.7639, lr_0 = 3.5552e-04
Loss = 6.5340e-02, PNorm = 65.3247, GNorm = 0.6482, lr_0 = 3.5528e-04
Loss = 6.3839e-02, PNorm = 65.3292, GNorm = 0.7117, lr_0 = 3.5503e-04
Loss = 6.5388e-02, PNorm = 65.3338, GNorm = 0.4694, lr_0 = 3.5479e-04
Loss = 7.6783e-02, PNorm = 65.3376, GNorm = 0.8785, lr_0 = 3.5455e-04
Loss = 7.8653e-02, PNorm = 65.3467, GNorm = 0.5638, lr_0 = 3.5430e-04
Loss = 7.3613e-02, PNorm = 65.3558, GNorm = 0.6437, lr_0 = 3.5406e-04
Loss = 7.4858e-02, PNorm = 65.3667, GNorm = 0.6797, lr_0 = 3.5382e-04
Loss = 6.5021e-02, PNorm = 65.3774, GNorm = 0.6080, lr_0 = 3.5358e-04
Loss = 6.6678e-02, PNorm = 65.3862, GNorm = 0.7840, lr_0 = 3.5333e-04
Loss = 8.4164e-02, PNorm = 65.3950, GNorm = 0.6563, lr_0 = 3.5309e-04
Loss = 7.6234e-02, PNorm = 65.4039, GNorm = 0.8850, lr_0 = 3.5285e-04
Loss = 7.1390e-02, PNorm = 65.4087, GNorm = 0.6567, lr_0 = 3.5261e-04
Loss = 7.6133e-02, PNorm = 65.4155, GNorm = 0.3552, lr_0 = 3.5237e-04
Loss = 8.1251e-02, PNorm = 65.4250, GNorm = 0.5625, lr_0 = 3.5212e-04
Loss = 7.4500e-02, PNorm = 65.4324, GNorm = 0.7319, lr_0 = 3.5188e-04
Loss = 6.7845e-02, PNorm = 65.4395, GNorm = 0.8622, lr_0 = 3.5164e-04
Loss = 6.8793e-02, PNorm = 65.4454, GNorm = 0.6870, lr_0 = 3.5140e-04
Loss = 6.3770e-02, PNorm = 65.4509, GNorm = 0.5064, lr_0 = 3.5116e-04
Loss = 6.4712e-02, PNorm = 65.4578, GNorm = 0.4695, lr_0 = 3.5092e-04
Loss = 6.5522e-02, PNorm = 65.4669, GNorm = 0.4520, lr_0 = 3.5068e-04
Loss = 6.4442e-02, PNorm = 65.4755, GNorm = 0.7939, lr_0 = 3.5044e-04
Loss = 5.6549e-02, PNorm = 65.4831, GNorm = 0.3662, lr_0 = 3.5020e-04
Loss = 7.0766e-02, PNorm = 65.4900, GNorm = 0.8208, lr_0 = 3.4996e-04
Loss = 6.5041e-02, PNorm = 65.4940, GNorm = 0.5587, lr_0 = 3.4972e-04
Loss = 7.6634e-02, PNorm = 65.4982, GNorm = 0.5378, lr_0 = 3.4948e-04
Loss = 6.3076e-02, PNorm = 65.5067, GNorm = 0.5241, lr_0 = 3.4924e-04
Loss = 8.3874e-02, PNorm = 65.5127, GNorm = 0.8016, lr_0 = 3.4900e-04
Loss = 7.2541e-02, PNorm = 65.5182, GNorm = 0.6567, lr_0 = 3.4876e-04
Loss = 6.8994e-02, PNorm = 65.5222, GNorm = 1.1648, lr_0 = 3.4852e-04
Loss = 6.2880e-02, PNorm = 65.5273, GNorm = 0.5522, lr_0 = 3.4828e-04
Loss = 6.3312e-02, PNorm = 65.5332, GNorm = 0.5244, lr_0 = 3.4805e-04
Loss = 6.3777e-02, PNorm = 65.5400, GNorm = 0.8054, lr_0 = 3.4781e-04
Loss = 6.1972e-02, PNorm = 65.5438, GNorm = 0.7135, lr_0 = 3.4757e-04
Loss = 7.3846e-02, PNorm = 65.5492, GNorm = 0.5137, lr_0 = 3.4733e-04
Loss = 7.0255e-02, PNorm = 65.5563, GNorm = 0.6130, lr_0 = 3.4709e-04
Loss = 7.1982e-02, PNorm = 65.5657, GNorm = 0.5379, lr_0 = 3.4686e-04
Loss = 6.2223e-02, PNorm = 65.5719, GNorm = 0.8470, lr_0 = 3.4662e-04
Loss = 7.3661e-02, PNorm = 65.5770, GNorm = 0.6181, lr_0 = 3.4638e-04
Loss = 7.1217e-02, PNorm = 65.5799, GNorm = 0.5161, lr_0 = 3.4614e-04
Loss = 6.1181e-02, PNorm = 65.5873, GNorm = 0.4574, lr_0 = 3.4591e-04
Loss = 6.9264e-02, PNorm = 65.5962, GNorm = 0.5759, lr_0 = 3.4567e-04
Loss = 7.0432e-02, PNorm = 65.6038, GNorm = 0.8056, lr_0 = 3.4543e-04
Loss = 6.7168e-02, PNorm = 65.6096, GNorm = 0.6314, lr_0 = 3.4520e-04
Loss = 6.5158e-02, PNorm = 65.6173, GNorm = 0.7021, lr_0 = 3.4496e-04
Loss = 7.4062e-02, PNorm = 65.6261, GNorm = 0.7018, lr_0 = 3.4472e-04
Loss = 5.8313e-02, PNorm = 65.6305, GNorm = 0.4331, lr_0 = 3.4449e-04
Loss = 7.3994e-02, PNorm = 65.6347, GNorm = 0.5514, lr_0 = 3.4425e-04
Loss = 6.8907e-02, PNorm = 65.6395, GNorm = 0.5924, lr_0 = 3.4402e-04
Loss = 7.1607e-02, PNorm = 65.6470, GNorm = 1.0767, lr_0 = 3.4378e-04
Loss = 7.2278e-02, PNorm = 65.6527, GNorm = 0.6152, lr_0 = 3.4354e-04
Loss = 6.8651e-02, PNorm = 65.6589, GNorm = 0.7497, lr_0 = 3.4331e-04
Validation mae = 0.390788
Epoch 15
Loss = 5.7993e-02, PNorm = 65.6644, GNorm = 0.5963, lr_0 = 3.4307e-04
Loss = 5.4281e-02, PNorm = 65.6704, GNorm = 0.5180, lr_0 = 3.4284e-04
Loss = 6.2556e-02, PNorm = 65.6774, GNorm = 0.5455, lr_0 = 3.4260e-04
Loss = 6.8273e-02, PNorm = 65.6845, GNorm = 0.6020, lr_0 = 3.4237e-04
Loss = 6.3501e-02, PNorm = 65.6895, GNorm = 0.5895, lr_0 = 3.4213e-04
Loss = 6.0993e-02, PNorm = 65.6946, GNorm = 0.3226, lr_0 = 3.4190e-04
Loss = 5.8085e-02, PNorm = 65.7014, GNorm = 0.5024, lr_0 = 3.4167e-04
Loss = 7.4804e-02, PNorm = 65.7094, GNorm = 0.5804, lr_0 = 3.4143e-04
Loss = 5.8803e-02, PNorm = 65.7163, GNorm = 0.6702, lr_0 = 3.4120e-04
Loss = 5.7820e-02, PNorm = 65.7212, GNorm = 0.3700, lr_0 = 3.4096e-04
Loss = 6.8794e-02, PNorm = 65.7283, GNorm = 0.5696, lr_0 = 3.4073e-04
Loss = 5.8115e-02, PNorm = 65.7375, GNorm = 0.5391, lr_0 = 3.4050e-04
Loss = 6.0195e-02, PNorm = 65.7455, GNorm = 0.6386, lr_0 = 3.4026e-04
Loss = 7.0487e-02, PNorm = 65.7513, GNorm = 0.4807, lr_0 = 3.4003e-04
Loss = 6.0187e-02, PNorm = 65.7615, GNorm = 0.4406, lr_0 = 3.3980e-04
Loss = 7.2856e-02, PNorm = 65.7688, GNorm = 0.6453, lr_0 = 3.3956e-04
Loss = 6.2033e-02, PNorm = 65.7794, GNorm = 0.6044, lr_0 = 3.3933e-04
Loss = 7.6286e-02, PNorm = 65.7887, GNorm = 1.0316, lr_0 = 3.3910e-04
Loss = 6.3555e-02, PNorm = 65.7967, GNorm = 0.9064, lr_0 = 3.3887e-04
Loss = 6.2018e-02, PNorm = 65.8033, GNorm = 0.5321, lr_0 = 3.3864e-04
Loss = 7.1791e-02, PNorm = 65.8084, GNorm = 0.6801, lr_0 = 3.3840e-04
Loss = 5.9140e-02, PNorm = 65.8148, GNorm = 0.8457, lr_0 = 3.3817e-04
Loss = 6.4801e-02, PNorm = 65.8207, GNorm = 0.6743, lr_0 = 3.3794e-04
Loss = 5.5476e-02, PNorm = 65.8282, GNorm = 0.4727, lr_0 = 3.3771e-04
Loss = 7.4769e-02, PNorm = 65.8357, GNorm = 1.0259, lr_0 = 3.3748e-04
Loss = 7.3844e-02, PNorm = 65.8448, GNorm = 0.5068, lr_0 = 3.3725e-04
Loss = 5.9726e-02, PNorm = 65.8541, GNorm = 0.4474, lr_0 = 3.3701e-04
Loss = 5.7483e-02, PNorm = 65.8599, GNorm = 0.6363, lr_0 = 3.3678e-04
Loss = 6.9019e-02, PNorm = 65.8649, GNorm = 0.4628, lr_0 = 3.3655e-04
Loss = 5.1008e-02, PNorm = 65.8685, GNorm = 0.3407, lr_0 = 3.3632e-04
Loss = 5.6223e-02, PNorm = 65.8695, GNorm = 0.7516, lr_0 = 3.3609e-04
Loss = 6.2754e-02, PNorm = 65.8730, GNorm = 0.4972, lr_0 = 3.3586e-04
Loss = 5.8094e-02, PNorm = 65.8804, GNorm = 0.4741, lr_0 = 3.3563e-04
Loss = 5.6033e-02, PNorm = 65.8889, GNorm = 0.7926, lr_0 = 3.3540e-04
Loss = 7.1291e-02, PNorm = 65.8970, GNorm = 0.5538, lr_0 = 3.3517e-04
Loss = 6.7055e-02, PNorm = 65.9050, GNorm = 0.7951, lr_0 = 3.3494e-04
Loss = 7.0871e-02, PNorm = 65.9126, GNorm = 0.6758, lr_0 = 3.3471e-04
Loss = 5.9478e-02, PNorm = 65.9197, GNorm = 0.4239, lr_0 = 3.3448e-04
Loss = 6.2470e-02, PNorm = 65.9275, GNorm = 0.4715, lr_0 = 3.3425e-04
Loss = 6.3443e-02, PNorm = 65.9349, GNorm = 0.7287, lr_0 = 3.3403e-04
Loss = 6.4900e-02, PNorm = 65.9402, GNorm = 0.6013, lr_0 = 3.3380e-04
Loss = 7.2788e-02, PNorm = 65.9479, GNorm = 0.6354, lr_0 = 3.3357e-04
Loss = 7.0035e-02, PNorm = 65.9554, GNorm = 0.7740, lr_0 = 3.3334e-04
Loss = 5.7554e-02, PNorm = 65.9614, GNorm = 0.6145, lr_0 = 3.3311e-04
Loss = 6.6334e-02, PNorm = 65.9675, GNorm = 0.8885, lr_0 = 3.3288e-04
Loss = 6.9472e-02, PNorm = 65.9730, GNorm = 0.7013, lr_0 = 3.3265e-04
Loss = 7.3121e-02, PNorm = 65.9762, GNorm = 0.7036, lr_0 = 3.3243e-04
Loss = 6.8243e-02, PNorm = 65.9831, GNorm = 0.6166, lr_0 = 3.3220e-04
Loss = 6.1082e-02, PNorm = 65.9906, GNorm = 0.4608, lr_0 = 3.3197e-04
Loss = 5.7093e-02, PNorm = 65.9989, GNorm = 0.5795, lr_0 = 3.3174e-04
Loss = 7.5306e-02, PNorm = 66.0075, GNorm = 0.8346, lr_0 = 3.3152e-04
Loss = 6.5080e-02, PNorm = 66.0152, GNorm = 0.5607, lr_0 = 3.3129e-04
Loss = 6.8305e-02, PNorm = 66.0234, GNorm = 0.5487, lr_0 = 3.3106e-04
Loss = 5.9813e-02, PNorm = 66.0312, GNorm = 0.6626, lr_0 = 3.3084e-04
Loss = 5.7228e-02, PNorm = 66.0373, GNorm = 0.7576, lr_0 = 3.3061e-04
Loss = 6.8417e-02, PNorm = 66.0411, GNorm = 1.0327, lr_0 = 3.3038e-04
Loss = 6.7133e-02, PNorm = 66.0492, GNorm = 0.5990, lr_0 = 3.3016e-04
Loss = 6.7326e-02, PNorm = 66.0566, GNorm = 0.5693, lr_0 = 3.2993e-04
Loss = 7.3135e-02, PNorm = 66.0638, GNorm = 0.7201, lr_0 = 3.2970e-04
Loss = 5.7809e-02, PNorm = 66.0729, GNorm = 0.5876, lr_0 = 3.2948e-04
Loss = 5.9111e-02, PNorm = 66.0808, GNorm = 0.7966, lr_0 = 3.2925e-04
Loss = 6.5343e-02, PNorm = 66.0853, GNorm = 0.9951, lr_0 = 3.2903e-04
Loss = 6.0790e-02, PNorm = 66.0909, GNorm = 0.5809, lr_0 = 3.2880e-04
Loss = 8.2092e-02, PNorm = 66.0971, GNorm = 0.4948, lr_0 = 3.2858e-04
Loss = 6.8222e-02, PNorm = 66.1049, GNorm = 0.8298, lr_0 = 3.2835e-04
Loss = 6.9646e-02, PNorm = 66.1140, GNorm = 0.6207, lr_0 = 3.2813e-04
Loss = 7.0369e-02, PNorm = 66.1224, GNorm = 0.6428, lr_0 = 3.2790e-04
Loss = 6.3189e-02, PNorm = 66.1259, GNorm = 0.7933, lr_0 = 3.2768e-04
Loss = 7.0972e-02, PNorm = 66.1290, GNorm = 0.4636, lr_0 = 3.2745e-04
Loss = 6.2955e-02, PNorm = 66.1382, GNorm = 0.6070, lr_0 = 3.2723e-04
Loss = 6.8099e-02, PNorm = 66.1471, GNorm = 0.9567, lr_0 = 3.2700e-04
Loss = 5.6620e-02, PNorm = 66.1528, GNorm = 0.5496, lr_0 = 3.2678e-04
Loss = 7.0375e-02, PNorm = 66.1601, GNorm = 0.5629, lr_0 = 3.2656e-04
Loss = 7.0923e-02, PNorm = 66.1714, GNorm = 0.7340, lr_0 = 3.2633e-04
Loss = 6.3594e-02, PNorm = 66.1816, GNorm = 0.8247, lr_0 = 3.2611e-04
Loss = 6.2692e-02, PNorm = 66.1871, GNorm = 0.5447, lr_0 = 3.2589e-04
Loss = 6.2478e-02, PNorm = 66.1918, GNorm = 0.4883, lr_0 = 3.2566e-04
Loss = 6.1015e-02, PNorm = 66.1959, GNorm = 0.6301, lr_0 = 3.2544e-04
Loss = 7.3532e-02, PNorm = 66.2031, GNorm = 0.5334, lr_0 = 3.2522e-04
Loss = 7.2989e-02, PNorm = 66.2078, GNorm = 1.1349, lr_0 = 3.2499e-04
Loss = 7.7785e-02, PNorm = 66.2156, GNorm = 0.6621, lr_0 = 3.2477e-04
Loss = 6.8148e-02, PNorm = 66.2227, GNorm = 0.5657, lr_0 = 3.2455e-04
Loss = 5.8642e-02, PNorm = 66.2305, GNorm = 0.6929, lr_0 = 3.2433e-04
Loss = 6.5475e-02, PNorm = 66.2356, GNorm = 0.6474, lr_0 = 3.2410e-04
Loss = 7.0121e-02, PNorm = 66.2413, GNorm = 0.6676, lr_0 = 3.2388e-04
Loss = 6.1528e-02, PNorm = 66.2477, GNorm = 0.5724, lr_0 = 3.2366e-04
Loss = 5.9580e-02, PNorm = 66.2537, GNorm = 0.5238, lr_0 = 3.2344e-04
Loss = 7.0871e-02, PNorm = 66.2594, GNorm = 0.7415, lr_0 = 3.2322e-04
Loss = 6.2248e-02, PNorm = 66.2665, GNorm = 0.5294, lr_0 = 3.2300e-04
Loss = 6.5326e-02, PNorm = 66.2729, GNorm = 0.7254, lr_0 = 3.2277e-04
Loss = 6.8441e-02, PNorm = 66.2807, GNorm = 0.6769, lr_0 = 3.2255e-04
Loss = 6.6501e-02, PNorm = 66.2871, GNorm = 0.6658, lr_0 = 3.2233e-04
Loss = 6.5249e-02, PNorm = 66.2929, GNorm = 0.5551, lr_0 = 3.2211e-04
Loss = 6.9637e-02, PNorm = 66.3004, GNorm = 0.4403, lr_0 = 3.2189e-04
Loss = 6.8311e-02, PNorm = 66.3060, GNorm = 0.7598, lr_0 = 3.2167e-04
Loss = 5.9733e-02, PNorm = 66.3132, GNorm = 0.9155, lr_0 = 3.2145e-04
Loss = 6.9718e-02, PNorm = 66.3187, GNorm = 0.5599, lr_0 = 3.2123e-04
Loss = 5.9258e-02, PNorm = 66.3240, GNorm = 0.6912, lr_0 = 3.2101e-04
Loss = 5.8570e-02, PNorm = 66.3321, GNorm = 0.3831, lr_0 = 3.2079e-04
Loss = 6.5617e-02, PNorm = 66.3404, GNorm = 0.6371, lr_0 = 3.2057e-04
Loss = 5.5062e-02, PNorm = 66.3462, GNorm = 0.4721, lr_0 = 3.2035e-04
Loss = 6.1980e-02, PNorm = 66.3493, GNorm = 0.4415, lr_0 = 3.2013e-04
Loss = 7.1694e-02, PNorm = 66.3558, GNorm = 0.6491, lr_0 = 3.1991e-04
Loss = 6.6609e-02, PNorm = 66.3643, GNorm = 0.5271, lr_0 = 3.1969e-04
Loss = 6.6602e-02, PNorm = 66.3682, GNorm = 0.7673, lr_0 = 3.1947e-04
Loss = 6.2832e-02, PNorm = 66.3731, GNorm = 0.5450, lr_0 = 3.1925e-04
Loss = 6.9020e-02, PNorm = 66.3789, GNorm = 0.9377, lr_0 = 3.1904e-04
Loss = 6.9769e-02, PNorm = 66.3851, GNorm = 0.4379, lr_0 = 3.1882e-04
Loss = 6.4284e-02, PNorm = 66.3925, GNorm = 0.6027, lr_0 = 3.1860e-04
Loss = 7.2560e-02, PNorm = 66.3972, GNorm = 1.0929, lr_0 = 3.1838e-04
Loss = 6.7509e-02, PNorm = 66.4062, GNorm = 0.4960, lr_0 = 3.1816e-04
Loss = 6.7963e-02, PNorm = 66.4139, GNorm = 0.4133, lr_0 = 3.1794e-04
Loss = 6.1310e-02, PNorm = 66.4181, GNorm = 0.5588, lr_0 = 3.1773e-04
Loss = 6.4607e-02, PNorm = 66.4243, GNorm = 0.6257, lr_0 = 3.1751e-04
Loss = 7.2469e-02, PNorm = 66.4259, GNorm = 1.1543, lr_0 = 3.1729e-04
Loss = 6.9955e-02, PNorm = 66.4296, GNorm = 0.5706, lr_0 = 3.1707e-04
Loss = 6.0486e-02, PNorm = 66.4338, GNorm = 0.5665, lr_0 = 3.1686e-04
Loss = 6.8418e-02, PNorm = 66.4397, GNorm = 0.5401, lr_0 = 3.1664e-04
Loss = 6.3782e-02, PNorm = 66.4459, GNorm = 0.4604, lr_0 = 3.1642e-04
Loss = 6.7120e-02, PNorm = 66.4500, GNorm = 0.5530, lr_0 = 3.1621e-04
Validation mae = 0.388459
Epoch 16
Loss = 5.3767e-02, PNorm = 66.4541, GNorm = 0.4824, lr_0 = 3.1599e-04
Loss = 5.9601e-02, PNorm = 66.4621, GNorm = 0.4957, lr_0 = 3.1577e-04
Loss = 5.7103e-02, PNorm = 66.4706, GNorm = 0.5011, lr_0 = 3.1556e-04
Loss = 6.4891e-02, PNorm = 66.4786, GNorm = 0.5174, lr_0 = 3.1534e-04
Loss = 5.5725e-02, PNorm = 66.4846, GNorm = 0.6095, lr_0 = 3.1512e-04
Loss = 4.7207e-02, PNorm = 66.4928, GNorm = 0.6597, lr_0 = 3.1491e-04
Loss = 5.1841e-02, PNorm = 66.4983, GNorm = 0.6735, lr_0 = 3.1469e-04
Loss = 5.7017e-02, PNorm = 66.5015, GNorm = 0.5163, lr_0 = 3.1448e-04
Loss = 6.2471e-02, PNorm = 66.5086, GNorm = 0.4604, lr_0 = 3.1426e-04
Loss = 5.8300e-02, PNorm = 66.5171, GNorm = 0.8722, lr_0 = 3.1405e-04
Loss = 5.9944e-02, PNorm = 66.5243, GNorm = 0.6382, lr_0 = 3.1383e-04
Loss = 8.2933e-02, PNorm = 66.5316, GNorm = 0.7315, lr_0 = 3.1362e-04
Loss = 5.5304e-02, PNorm = 66.5404, GNorm = 0.7426, lr_0 = 3.1340e-04
Loss = 6.4108e-02, PNorm = 66.5504, GNorm = 0.4768, lr_0 = 3.1319e-04
Loss = 5.7110e-02, PNorm = 66.5565, GNorm = 0.5128, lr_0 = 3.1297e-04
Loss = 4.3228e-02, PNorm = 66.5619, GNorm = 0.4484, lr_0 = 3.1276e-04
Loss = 5.0529e-02, PNorm = 66.5683, GNorm = 0.4565, lr_0 = 3.1254e-04
Loss = 5.5344e-02, PNorm = 66.5736, GNorm = 0.4890, lr_0 = 3.1233e-04
Loss = 5.1491e-02, PNorm = 66.5781, GNorm = 0.9392, lr_0 = 3.1212e-04
Loss = 6.2773e-02, PNorm = 66.5836, GNorm = 0.7253, lr_0 = 3.1190e-04
Loss = 5.7915e-02, PNorm = 66.5904, GNorm = 0.9397, lr_0 = 3.1169e-04
Loss = 5.9465e-02, PNorm = 66.5974, GNorm = 0.5581, lr_0 = 3.1147e-04
Loss = 5.8231e-02, PNorm = 66.6016, GNorm = 0.5252, lr_0 = 3.1126e-04
Loss = 6.3104e-02, PNorm = 66.6066, GNorm = 1.1743, lr_0 = 3.1105e-04
Loss = 5.5636e-02, PNorm = 66.6135, GNorm = 0.5151, lr_0 = 3.1083e-04
Loss = 5.5543e-02, PNorm = 66.6203, GNorm = 0.9036, lr_0 = 3.1062e-04
Loss = 6.3121e-02, PNorm = 66.6284, GNorm = 1.1166, lr_0 = 3.1041e-04
Loss = 6.3055e-02, PNorm = 66.6359, GNorm = 0.5937, lr_0 = 3.1020e-04
Loss = 5.9681e-02, PNorm = 66.6433, GNorm = 0.6697, lr_0 = 3.0998e-04
Loss = 7.0706e-02, PNorm = 66.6490, GNorm = 0.7202, lr_0 = 3.0977e-04
Loss = 5.4806e-02, PNorm = 66.6554, GNorm = 0.4624, lr_0 = 3.0956e-04
Loss = 7.2732e-02, PNorm = 66.6620, GNorm = 0.5231, lr_0 = 3.0935e-04
Loss = 6.0251e-02, PNorm = 66.6696, GNorm = 0.6414, lr_0 = 3.0914e-04
Loss = 5.4040e-02, PNorm = 66.6748, GNorm = 0.4952, lr_0 = 3.0892e-04
Loss = 5.9686e-02, PNorm = 66.6778, GNorm = 0.7688, lr_0 = 3.0871e-04
Loss = 6.3533e-02, PNorm = 66.6830, GNorm = 0.6203, lr_0 = 3.0850e-04
Loss = 6.7157e-02, PNorm = 66.6892, GNorm = 1.1605, lr_0 = 3.0829e-04
Loss = 5.1439e-02, PNorm = 66.6943, GNorm = 0.4182, lr_0 = 3.0808e-04
Loss = 5.7171e-02, PNorm = 66.6994, GNorm = 0.4694, lr_0 = 3.0787e-04
Loss = 5.8159e-02, PNorm = 66.7026, GNorm = 0.5881, lr_0 = 3.0766e-04
Loss = 5.3811e-02, PNorm = 66.7060, GNorm = 0.7030, lr_0 = 3.0745e-04
Loss = 6.0740e-02, PNorm = 66.7103, GNorm = 0.7202, lr_0 = 3.0723e-04
Loss = 6.1733e-02, PNorm = 66.7167, GNorm = 0.5558, lr_0 = 3.0702e-04
Loss = 6.2891e-02, PNorm = 66.7247, GNorm = 0.6328, lr_0 = 3.0681e-04
Loss = 5.7179e-02, PNorm = 66.7297, GNorm = 0.5695, lr_0 = 3.0660e-04
Loss = 5.6767e-02, PNorm = 66.7354, GNorm = 0.5143, lr_0 = 3.0639e-04
Loss = 5.3987e-02, PNorm = 66.7394, GNorm = 0.4649, lr_0 = 3.0618e-04
Loss = 5.7541e-02, PNorm = 66.7429, GNorm = 0.6726, lr_0 = 3.0597e-04
Loss = 6.3666e-02, PNorm = 66.7492, GNorm = 0.6447, lr_0 = 3.0576e-04
Loss = 6.4677e-02, PNorm = 66.7576, GNorm = 0.5341, lr_0 = 3.0555e-04
Loss = 6.1882e-02, PNorm = 66.7624, GNorm = 0.5535, lr_0 = 3.0535e-04
Loss = 5.3762e-02, PNorm = 66.7661, GNorm = 0.7672, lr_0 = 3.0514e-04
Loss = 6.0211e-02, PNorm = 66.7723, GNorm = 0.4625, lr_0 = 3.0493e-04
Loss = 5.7380e-02, PNorm = 66.7785, GNorm = 0.6618, lr_0 = 3.0472e-04
Loss = 6.5989e-02, PNorm = 66.7851, GNorm = 0.4119, lr_0 = 3.0451e-04
Loss = 6.3929e-02, PNorm = 66.7936, GNorm = 0.6061, lr_0 = 3.0430e-04
Loss = 6.3510e-02, PNorm = 66.8029, GNorm = 0.6542, lr_0 = 3.0409e-04
Loss = 7.2542e-02, PNorm = 66.8103, GNorm = 0.5902, lr_0 = 3.0388e-04
Loss = 7.1076e-02, PNorm = 66.8140, GNorm = 0.5492, lr_0 = 3.0368e-04
Loss = 5.7105e-02, PNorm = 66.8211, GNorm = 0.5030, lr_0 = 3.0347e-04
Loss = 5.6619e-02, PNorm = 66.8282, GNorm = 0.9435, lr_0 = 3.0326e-04
Loss = 6.5192e-02, PNorm = 66.8337, GNorm = 0.6261, lr_0 = 3.0305e-04
Loss = 6.2496e-02, PNorm = 66.8395, GNorm = 0.7690, lr_0 = 3.0284e-04
Loss = 5.6017e-02, PNorm = 66.8483, GNorm = 0.4674, lr_0 = 3.0264e-04
Loss = 6.4279e-02, PNorm = 66.8552, GNorm = 0.7708, lr_0 = 3.0243e-04
Loss = 5.9082e-02, PNorm = 66.8593, GNorm = 0.6362, lr_0 = 3.0222e-04
Loss = 5.8942e-02, PNorm = 66.8653, GNorm = 0.7286, lr_0 = 3.0202e-04
Loss = 6.6984e-02, PNorm = 66.8710, GNorm = 0.6758, lr_0 = 3.0181e-04
Loss = 6.9389e-02, PNorm = 66.8778, GNorm = 0.5594, lr_0 = 3.0160e-04
Loss = 6.2197e-02, PNorm = 66.8840, GNorm = 0.6771, lr_0 = 3.0140e-04
Loss = 6.3148e-02, PNorm = 66.8900, GNorm = 0.4796, lr_0 = 3.0119e-04
Loss = 6.2710e-02, PNorm = 66.8952, GNorm = 0.6214, lr_0 = 3.0098e-04
Loss = 5.1848e-02, PNorm = 66.8999, GNorm = 0.5678, lr_0 = 3.0078e-04
Loss = 6.6089e-02, PNorm = 66.9074, GNorm = 0.7422, lr_0 = 3.0057e-04
Loss = 6.2269e-02, PNorm = 66.9141, GNorm = 0.4356, lr_0 = 3.0036e-04
Loss = 6.3545e-02, PNorm = 66.9200, GNorm = 1.0453, lr_0 = 3.0016e-04
Loss = 6.4064e-02, PNorm = 66.9253, GNorm = 0.5469, lr_0 = 2.9995e-04
Loss = 5.9849e-02, PNorm = 66.9337, GNorm = 0.6223, lr_0 = 2.9975e-04
Loss = 6.0679e-02, PNorm = 66.9417, GNorm = 0.4790, lr_0 = 2.9954e-04
Loss = 6.4118e-02, PNorm = 66.9470, GNorm = 0.5512, lr_0 = 2.9934e-04
Loss = 6.0855e-02, PNorm = 66.9520, GNorm = 0.5344, lr_0 = 2.9913e-04
Loss = 6.6535e-02, PNorm = 66.9539, GNorm = 0.5696, lr_0 = 2.9893e-04
Loss = 6.7513e-02, PNorm = 66.9553, GNorm = 0.5791, lr_0 = 2.9872e-04
Loss = 5.4567e-02, PNorm = 66.9581, GNorm = 0.6739, lr_0 = 2.9852e-04
Loss = 6.5367e-02, PNorm = 66.9652, GNorm = 0.4199, lr_0 = 2.9831e-04
Loss = 5.9711e-02, PNorm = 66.9717, GNorm = 0.6203, lr_0 = 2.9811e-04
Loss = 5.9794e-02, PNorm = 66.9755, GNorm = 0.6152, lr_0 = 2.9790e-04
Loss = 6.0616e-02, PNorm = 66.9817, GNorm = 0.4960, lr_0 = 2.9770e-04
Loss = 5.9724e-02, PNorm = 66.9867, GNorm = 0.7865, lr_0 = 2.9750e-04
Loss = 6.6709e-02, PNorm = 66.9916, GNorm = 0.7946, lr_0 = 2.9729e-04
Loss = 6.9788e-02, PNorm = 66.9981, GNorm = 0.9504, lr_0 = 2.9709e-04
Loss = 6.3254e-02, PNorm = 67.0060, GNorm = 0.4580, lr_0 = 2.9689e-04
Loss = 5.7162e-02, PNorm = 67.0117, GNorm = 0.5826, lr_0 = 2.9668e-04
Loss = 6.2262e-02, PNorm = 67.0144, GNorm = 0.4709, lr_0 = 2.9648e-04
Loss = 6.4143e-02, PNorm = 67.0215, GNorm = 0.4701, lr_0 = 2.9628e-04
Loss = 6.5816e-02, PNorm = 67.0300, GNorm = 0.5822, lr_0 = 2.9607e-04
Loss = 6.0415e-02, PNorm = 67.0371, GNorm = 0.5693, lr_0 = 2.9587e-04
Loss = 7.4140e-02, PNorm = 67.0458, GNorm = 0.7937, lr_0 = 2.9567e-04
Loss = 5.9630e-02, PNorm = 67.0516, GNorm = 0.5300, lr_0 = 2.9546e-04
Loss = 5.3955e-02, PNorm = 67.0557, GNorm = 0.4062, lr_0 = 2.9526e-04
Loss = 5.9743e-02, PNorm = 67.0599, GNorm = 0.6379, lr_0 = 2.9506e-04
Loss = 6.1805e-02, PNorm = 67.0664, GNorm = 0.5552, lr_0 = 2.9486e-04
Loss = 6.6265e-02, PNorm = 67.0729, GNorm = 0.6340, lr_0 = 2.9466e-04
Loss = 7.0658e-02, PNorm = 67.0789, GNorm = 0.5608, lr_0 = 2.9445e-04
Loss = 6.0450e-02, PNorm = 67.0840, GNorm = 0.5201, lr_0 = 2.9425e-04
Loss = 6.2055e-02, PNorm = 67.0893, GNorm = 0.4025, lr_0 = 2.9405e-04
Loss = 5.4082e-02, PNorm = 67.0954, GNorm = 0.4434, lr_0 = 2.9385e-04
Loss = 6.0511e-02, PNorm = 67.0982, GNorm = 0.7714, lr_0 = 2.9365e-04
Loss = 6.2787e-02, PNorm = 67.1042, GNorm = 0.6976, lr_0 = 2.9345e-04
Loss = 6.0629e-02, PNorm = 67.1127, GNorm = 0.5268, lr_0 = 2.9325e-04
Loss = 6.3593e-02, PNorm = 67.1200, GNorm = 0.5726, lr_0 = 2.9305e-04
Loss = 6.8056e-02, PNorm = 67.1272, GNorm = 0.4756, lr_0 = 2.9284e-04
Loss = 6.4633e-02, PNorm = 67.1302, GNorm = 0.7016, lr_0 = 2.9264e-04
Loss = 5.1689e-02, PNorm = 67.1333, GNorm = 0.7501, lr_0 = 2.9244e-04
Loss = 7.0350e-02, PNorm = 67.1340, GNorm = 0.6520, lr_0 = 2.9224e-04
Loss = 5.7394e-02, PNorm = 67.1393, GNorm = 0.6621, lr_0 = 2.9204e-04
Loss = 5.8959e-02, PNorm = 67.1439, GNorm = 0.8866, lr_0 = 2.9184e-04
Loss = 6.1322e-02, PNorm = 67.1507, GNorm = 0.8201, lr_0 = 2.9164e-04
Loss = 6.7461e-02, PNorm = 67.1566, GNorm = 0.4689, lr_0 = 2.9144e-04
Loss = 5.4451e-02, PNorm = 67.1592, GNorm = 0.7355, lr_0 = 2.9124e-04
Validation mae = 0.388491
Epoch 17
Loss = 5.8537e-02, PNorm = 67.1618, GNorm = 0.5684, lr_0 = 2.9104e-04
Loss = 5.2274e-02, PNorm = 67.1671, GNorm = 0.4344, lr_0 = 2.9084e-04
Loss = 5.4906e-02, PNorm = 67.1721, GNorm = 0.6137, lr_0 = 2.9065e-04
Loss = 4.5911e-02, PNorm = 67.1797, GNorm = 0.3612, lr_0 = 2.9045e-04
Loss = 5.5545e-02, PNorm = 67.1867, GNorm = 0.7741, lr_0 = 2.9025e-04
Loss = 4.4893e-02, PNorm = 67.1897, GNorm = 0.4007, lr_0 = 2.9005e-04
Loss = 5.4604e-02, PNorm = 67.1931, GNorm = 0.4159, lr_0 = 2.8985e-04
Loss = 5.1042e-02, PNorm = 67.2000, GNorm = 0.6050, lr_0 = 2.8965e-04
Loss = 5.5429e-02, PNorm = 67.2052, GNorm = 0.4163, lr_0 = 2.8945e-04
Loss = 4.9737e-02, PNorm = 67.2094, GNorm = 0.8855, lr_0 = 2.8925e-04
Loss = 4.8485e-02, PNorm = 67.2132, GNorm = 0.3780, lr_0 = 2.8906e-04
Loss = 5.2901e-02, PNorm = 67.2191, GNorm = 0.5570, lr_0 = 2.8886e-04
Loss = 5.9064e-02, PNorm = 67.2241, GNorm = 0.5504, lr_0 = 2.8866e-04
Loss = 5.9187e-02, PNorm = 67.2301, GNorm = 0.7158, lr_0 = 2.8846e-04
Loss = 6.5807e-02, PNorm = 67.2381, GNorm = 0.5556, lr_0 = 2.8826e-04
Loss = 4.9207e-02, PNorm = 67.2427, GNorm = 0.7357, lr_0 = 2.8807e-04
Loss = 5.7126e-02, PNorm = 67.2471, GNorm = 0.5353, lr_0 = 2.8787e-04
Loss = 6.1625e-02, PNorm = 67.2534, GNorm = 0.6623, lr_0 = 2.8767e-04
Loss = 5.2846e-02, PNorm = 67.2598, GNorm = 0.8424, lr_0 = 2.8748e-04
Loss = 6.3888e-02, PNorm = 67.2664, GNorm = 0.5366, lr_0 = 2.8728e-04
Loss = 5.9226e-02, PNorm = 67.2726, GNorm = 0.4910, lr_0 = 2.8708e-04
Loss = 5.1136e-02, PNorm = 67.2789, GNorm = 0.4149, lr_0 = 2.8689e-04
Loss = 6.0319e-02, PNorm = 67.2847, GNorm = 0.7736, lr_0 = 2.8669e-04
Loss = 5.7651e-02, PNorm = 67.2870, GNorm = 0.4869, lr_0 = 2.8649e-04
Loss = 5.7650e-02, PNorm = 67.2906, GNorm = 0.5091, lr_0 = 2.8630e-04
Loss = 4.4652e-02, PNorm = 67.2942, GNorm = 0.3562, lr_0 = 2.8610e-04
Loss = 5.7579e-02, PNorm = 67.3005, GNorm = 0.4001, lr_0 = 2.8590e-04
Loss = 5.2573e-02, PNorm = 67.3081, GNorm = 0.4934, lr_0 = 2.8571e-04
Loss = 5.4921e-02, PNorm = 67.3148, GNorm = 0.5415, lr_0 = 2.8551e-04
Loss = 5.1598e-02, PNorm = 67.3193, GNorm = 0.5511, lr_0 = 2.8532e-04
Loss = 6.1020e-02, PNorm = 67.3244, GNorm = 0.5345, lr_0 = 2.8512e-04
Loss = 5.9784e-02, PNorm = 67.3301, GNorm = 0.6522, lr_0 = 2.8493e-04
Loss = 5.1905e-02, PNorm = 67.3364, GNorm = 0.8057, lr_0 = 2.8473e-04
Loss = 6.3530e-02, PNorm = 67.3418, GNorm = 0.6682, lr_0 = 2.8454e-04
Loss = 5.4799e-02, PNorm = 67.3484, GNorm = 0.4947, lr_0 = 2.8434e-04
Loss = 5.5008e-02, PNorm = 67.3562, GNorm = 0.6112, lr_0 = 2.8415e-04
Loss = 5.1939e-02, PNorm = 67.3624, GNorm = 0.7854, lr_0 = 2.8395e-04
Loss = 5.2945e-02, PNorm = 67.3668, GNorm = 0.5366, lr_0 = 2.8376e-04
Loss = 5.0330e-02, PNorm = 67.3709, GNorm = 0.7920, lr_0 = 2.8356e-04
Loss = 6.9040e-02, PNorm = 67.3751, GNorm = 0.5259, lr_0 = 2.8337e-04
Loss = 5.6869e-02, PNorm = 67.3809, GNorm = 0.8503, lr_0 = 2.8317e-04
Loss = 5.4019e-02, PNorm = 67.3867, GNorm = 0.6228, lr_0 = 2.8298e-04
Loss = 6.1029e-02, PNorm = 67.3925, GNorm = 0.5875, lr_0 = 2.8279e-04
Loss = 5.2487e-02, PNorm = 67.3987, GNorm = 0.5402, lr_0 = 2.8259e-04
Loss = 6.3055e-02, PNorm = 67.4066, GNorm = 0.4970, lr_0 = 2.8240e-04
Loss = 5.3714e-02, PNorm = 67.4125, GNorm = 0.7370, lr_0 = 2.8221e-04
Loss = 4.6214e-02, PNorm = 67.4169, GNorm = 0.4284, lr_0 = 2.8201e-04
Loss = 5.9534e-02, PNorm = 67.4214, GNorm = 0.4632, lr_0 = 2.8182e-04
Loss = 5.0139e-02, PNorm = 67.4269, GNorm = 0.4559, lr_0 = 2.8163e-04
Loss = 6.2090e-02, PNorm = 67.4326, GNorm = 0.7894, lr_0 = 2.8143e-04
Loss = 5.9162e-02, PNorm = 67.4371, GNorm = 0.4132, lr_0 = 2.8124e-04
Loss = 5.4931e-02, PNorm = 67.4431, GNorm = 0.4360, lr_0 = 2.8105e-04
Loss = 5.0402e-02, PNorm = 67.4474, GNorm = 0.5330, lr_0 = 2.8085e-04
Loss = 6.2295e-02, PNorm = 67.4516, GNorm = 0.8531, lr_0 = 2.8066e-04
Loss = 5.2286e-02, PNorm = 67.4529, GNorm = 0.5129, lr_0 = 2.8047e-04
Loss = 6.1394e-02, PNorm = 67.4557, GNorm = 0.5055, lr_0 = 2.8028e-04
Loss = 6.4587e-02, PNorm = 67.4600, GNorm = 0.4876, lr_0 = 2.8009e-04
Loss = 5.8141e-02, PNorm = 67.4649, GNorm = 0.3833, lr_0 = 2.7989e-04
Loss = 5.5871e-02, PNorm = 67.4722, GNorm = 0.5974, lr_0 = 2.7970e-04
Loss = 5.4995e-02, PNorm = 67.4795, GNorm = 0.4692, lr_0 = 2.7951e-04
Loss = 5.4657e-02, PNorm = 67.4864, GNorm = 0.6262, lr_0 = 2.7932e-04
Loss = 5.6614e-02, PNorm = 67.4916, GNorm = 0.4575, lr_0 = 2.7913e-04
Loss = 5.2948e-02, PNorm = 67.4957, GNorm = 0.5803, lr_0 = 2.7894e-04
Loss = 5.3341e-02, PNorm = 67.5003, GNorm = 0.7239, lr_0 = 2.7875e-04
Loss = 5.5962e-02, PNorm = 67.5061, GNorm = 0.6155, lr_0 = 2.7855e-04
Loss = 5.5988e-02, PNorm = 67.5097, GNorm = 0.4816, lr_0 = 2.7836e-04
Loss = 5.1983e-02, PNorm = 67.5135, GNorm = 0.5220, lr_0 = 2.7817e-04
Loss = 5.7939e-02, PNorm = 67.5191, GNorm = 0.5679, lr_0 = 2.7798e-04
Loss = 5.6358e-02, PNorm = 67.5237, GNorm = 0.7056, lr_0 = 2.7779e-04
Loss = 5.2341e-02, PNorm = 67.5262, GNorm = 0.5041, lr_0 = 2.7760e-04
Loss = 6.4294e-02, PNorm = 67.5298, GNorm = 0.6039, lr_0 = 2.7741e-04
Loss = 6.5046e-02, PNorm = 67.5359, GNorm = 0.7027, lr_0 = 2.7722e-04
Loss = 5.8381e-02, PNorm = 67.5411, GNorm = 0.7630, lr_0 = 2.7703e-04
Loss = 5.3823e-02, PNorm = 67.5491, GNorm = 0.5610, lr_0 = 2.7684e-04
Loss = 5.7680e-02, PNorm = 67.5550, GNorm = 0.5099, lr_0 = 2.7665e-04
Loss = 5.1409e-02, PNorm = 67.5583, GNorm = 0.6361, lr_0 = 2.7646e-04
Loss = 5.1154e-02, PNorm = 67.5621, GNorm = 0.5084, lr_0 = 2.7627e-04
Loss = 5.2974e-02, PNorm = 67.5677, GNorm = 0.7558, lr_0 = 2.7608e-04
Loss = 5.5704e-02, PNorm = 67.5727, GNorm = 0.6830, lr_0 = 2.7590e-04
Loss = 5.7388e-02, PNorm = 67.5785, GNorm = 0.6841, lr_0 = 2.7571e-04
Loss = 5.7771e-02, PNorm = 67.5844, GNorm = 0.5353, lr_0 = 2.7552e-04
Loss = 6.0153e-02, PNorm = 67.5907, GNorm = 0.5375, lr_0 = 2.7533e-04
Loss = 5.5084e-02, PNorm = 67.5959, GNorm = 0.4960, lr_0 = 2.7514e-04
Loss = 5.7560e-02, PNorm = 67.6016, GNorm = 0.6537, lr_0 = 2.7495e-04
Loss = 5.6098e-02, PNorm = 67.6058, GNorm = 0.9325, lr_0 = 2.7476e-04
Loss = 5.8000e-02, PNorm = 67.6098, GNorm = 0.6338, lr_0 = 2.7457e-04
Loss = 5.3159e-02, PNorm = 67.6144, GNorm = 0.4349, lr_0 = 2.7439e-04
Loss = 6.0571e-02, PNorm = 67.6173, GNorm = 0.8321, lr_0 = 2.7420e-04
Loss = 5.6374e-02, PNorm = 67.6225, GNorm = 0.5627, lr_0 = 2.7401e-04
Loss = 5.1891e-02, PNorm = 67.6270, GNorm = 0.6222, lr_0 = 2.7382e-04
Loss = 5.2242e-02, PNorm = 67.6331, GNorm = 0.5402, lr_0 = 2.7364e-04
Loss = 6.3756e-02, PNorm = 67.6381, GNorm = 0.9393, lr_0 = 2.7345e-04
Loss = 5.8953e-02, PNorm = 67.6446, GNorm = 0.6793, lr_0 = 2.7326e-04
Loss = 7.0167e-02, PNorm = 67.6485, GNorm = 0.6768, lr_0 = 2.7307e-04
Loss = 6.7505e-02, PNorm = 67.6541, GNorm = 0.6517, lr_0 = 2.7289e-04
Loss = 6.1211e-02, PNorm = 67.6599, GNorm = 0.8684, lr_0 = 2.7270e-04
Loss = 5.7426e-02, PNorm = 67.6669, GNorm = 0.5421, lr_0 = 2.7251e-04
Loss = 6.7195e-02, PNorm = 67.6737, GNorm = 0.7788, lr_0 = 2.7233e-04
Loss = 6.2934e-02, PNorm = 67.6825, GNorm = 0.4390, lr_0 = 2.7214e-04
Loss = 4.6660e-02, PNorm = 67.6882, GNorm = 0.4283, lr_0 = 2.7195e-04
Loss = 6.0043e-02, PNorm = 67.6909, GNorm = 0.6866, lr_0 = 2.7177e-04
Loss = 5.5610e-02, PNorm = 67.6951, GNorm = 0.7329, lr_0 = 2.7158e-04
Loss = 6.2199e-02, PNorm = 67.6988, GNorm = 0.4999, lr_0 = 2.7139e-04
Loss = 5.2604e-02, PNorm = 67.7023, GNorm = 0.6905, lr_0 = 2.7121e-04
Loss = 5.2852e-02, PNorm = 67.7062, GNorm = 0.7703, lr_0 = 2.7102e-04
Loss = 5.5196e-02, PNorm = 67.7115, GNorm = 0.8160, lr_0 = 2.7084e-04
Loss = 7.6551e-02, PNorm = 67.7165, GNorm = 0.6815, lr_0 = 2.7065e-04
Loss = 5.9301e-02, PNorm = 67.7222, GNorm = 0.5285, lr_0 = 2.7047e-04
Loss = 7.0625e-02, PNorm = 67.7266, GNorm = 0.6123, lr_0 = 2.7028e-04
Loss = 5.3967e-02, PNorm = 67.7307, GNorm = 0.6008, lr_0 = 2.7010e-04
Loss = 4.7111e-02, PNorm = 67.7345, GNorm = 0.4673, lr_0 = 2.6991e-04
Loss = 5.9805e-02, PNorm = 67.7382, GNorm = 0.4837, lr_0 = 2.6973e-04
Loss = 5.6615e-02, PNorm = 67.7401, GNorm = 0.4684, lr_0 = 2.6954e-04
Loss = 6.2215e-02, PNorm = 67.7450, GNorm = 0.3997, lr_0 = 2.6936e-04
Loss = 6.7552e-02, PNorm = 67.7503, GNorm = 0.8495, lr_0 = 2.6917e-04
Loss = 5.7660e-02, PNorm = 67.7561, GNorm = 0.4978, lr_0 = 2.6899e-04
Loss = 6.0965e-02, PNorm = 67.7616, GNorm = 0.6308, lr_0 = 2.6880e-04
Loss = 7.1097e-02, PNorm = 67.7675, GNorm = 0.5518, lr_0 = 2.6862e-04
Loss = 6.0850e-02, PNorm = 67.7709, GNorm = 0.6698, lr_0 = 2.6844e-04
Loss = 5.2682e-02, PNorm = 67.7755, GNorm = 0.5738, lr_0 = 2.6825e-04
Validation mae = 0.386821
Epoch 18
Loss = 5.7552e-02, PNorm = 67.7816, GNorm = 0.4279, lr_0 = 2.6807e-04
Loss = 4.6734e-02, PNorm = 67.7877, GNorm = 0.8147, lr_0 = 2.6788e-04
Loss = 5.9093e-02, PNorm = 67.7933, GNorm = 0.6303, lr_0 = 2.6770e-04
Loss = 4.9201e-02, PNorm = 67.8005, GNorm = 0.4318, lr_0 = 2.6752e-04
Loss = 4.3613e-02, PNorm = 67.8053, GNorm = 0.4629, lr_0 = 2.6733e-04
Loss = 4.8077e-02, PNorm = 67.8094, GNorm = 0.5868, lr_0 = 2.6715e-04
Loss = 4.6969e-02, PNorm = 67.8147, GNorm = 0.4972, lr_0 = 2.6697e-04
Loss = 4.9258e-02, PNorm = 67.8195, GNorm = 0.4892, lr_0 = 2.6678e-04
Loss = 4.8749e-02, PNorm = 67.8235, GNorm = 0.4948, lr_0 = 2.6660e-04
Loss = 4.2742e-02, PNorm = 67.8274, GNorm = 0.3879, lr_0 = 2.6642e-04
Loss = 4.6276e-02, PNorm = 67.8302, GNorm = 0.4927, lr_0 = 2.6624e-04
Loss = 5.2309e-02, PNorm = 67.8332, GNorm = 0.6505, lr_0 = 2.6605e-04
Loss = 6.2563e-02, PNorm = 67.8390, GNorm = 0.5210, lr_0 = 2.6587e-04
Loss = 4.5205e-02, PNorm = 67.8461, GNorm = 0.5993, lr_0 = 2.6569e-04
Loss = 6.3868e-02, PNorm = 67.8517, GNorm = 0.5232, lr_0 = 2.6551e-04
Loss = 4.6794e-02, PNorm = 67.8552, GNorm = 0.4339, lr_0 = 2.6533e-04
Loss = 4.9256e-02, PNorm = 67.8595, GNorm = 0.7819, lr_0 = 2.6514e-04
Loss = 5.4197e-02, PNorm = 67.8670, GNorm = 0.6875, lr_0 = 2.6496e-04
Loss = 4.9198e-02, PNorm = 67.8726, GNorm = 0.4861, lr_0 = 2.6478e-04
Loss = 4.5338e-02, PNorm = 67.8770, GNorm = 0.5124, lr_0 = 2.6460e-04
Loss = 5.3821e-02, PNorm = 67.8788, GNorm = 0.6075, lr_0 = 2.6442e-04
Loss = 5.6470e-02, PNorm = 67.8828, GNorm = 0.4246, lr_0 = 2.6424e-04
Loss = 4.5175e-02, PNorm = 67.8886, GNorm = 0.8031, lr_0 = 2.6406e-04
Loss = 5.1556e-02, PNorm = 67.8949, GNorm = 0.4588, lr_0 = 2.6388e-04
Loss = 5.0999e-02, PNorm = 67.8989, GNorm = 0.9356, lr_0 = 2.6369e-04
Loss = 5.5080e-02, PNorm = 67.9041, GNorm = 0.5662, lr_0 = 2.6351e-04
Loss = 4.7590e-02, PNorm = 67.9093, GNorm = 0.7756, lr_0 = 2.6333e-04
Loss = 5.3292e-02, PNorm = 67.9133, GNorm = 0.3344, lr_0 = 2.6315e-04
Loss = 5.1088e-02, PNorm = 67.9161, GNorm = 0.4885, lr_0 = 2.6297e-04
Loss = 5.6793e-02, PNorm = 67.9181, GNorm = 0.5173, lr_0 = 2.6279e-04
Loss = 5.2541e-02, PNorm = 67.9231, GNorm = 0.5583, lr_0 = 2.6261e-04
Loss = 4.4152e-02, PNorm = 67.9277, GNorm = 0.5683, lr_0 = 2.6243e-04
Loss = 5.1652e-02, PNorm = 67.9323, GNorm = 0.4261, lr_0 = 2.6225e-04
Loss = 6.7944e-02, PNorm = 67.9374, GNorm = 0.5626, lr_0 = 2.6207e-04
Loss = 5.3366e-02, PNorm = 67.9418, GNorm = 0.6431, lr_0 = 2.6189e-04
Loss = 5.1422e-02, PNorm = 67.9471, GNorm = 0.4368, lr_0 = 2.6171e-04
Loss = 5.0630e-02, PNorm = 67.9529, GNorm = 0.4271, lr_0 = 2.6153e-04
Loss = 5.1994e-02, PNorm = 67.9580, GNorm = 0.4346, lr_0 = 2.6136e-04
Loss = 5.7337e-02, PNorm = 67.9632, GNorm = 0.4371, lr_0 = 2.6118e-04
Loss = 5.2836e-02, PNorm = 67.9682, GNorm = 0.4078, lr_0 = 2.6100e-04
Loss = 4.8614e-02, PNorm = 67.9741, GNorm = 0.5101, lr_0 = 2.6082e-04
Loss = 5.3864e-02, PNorm = 67.9787, GNorm = 0.5011, lr_0 = 2.6064e-04
Loss = 5.4575e-02, PNorm = 67.9829, GNorm = 0.5438, lr_0 = 2.6046e-04
Loss = 5.0278e-02, PNorm = 67.9881, GNorm = 0.8289, lr_0 = 2.6028e-04
Loss = 5.2695e-02, PNorm = 67.9921, GNorm = 0.4026, lr_0 = 2.6011e-04
Loss = 5.7949e-02, PNorm = 67.9980, GNorm = 0.5245, lr_0 = 2.5993e-04
Loss = 5.0900e-02, PNorm = 68.0027, GNorm = 0.4005, lr_0 = 2.5975e-04
Loss = 4.6630e-02, PNorm = 68.0068, GNorm = 0.5726, lr_0 = 2.5957e-04
Loss = 5.1400e-02, PNorm = 68.0121, GNorm = 1.0480, lr_0 = 2.5939e-04
Loss = 4.4068e-02, PNorm = 68.0179, GNorm = 0.5473, lr_0 = 2.5922e-04
Loss = 5.1322e-02, PNorm = 68.0221, GNorm = 0.4902, lr_0 = 2.5904e-04
Loss = 5.8493e-02, PNorm = 68.0255, GNorm = 0.6344, lr_0 = 2.5886e-04
Loss = 5.8746e-02, PNorm = 68.0305, GNorm = 0.5385, lr_0 = 2.5868e-04
Loss = 5.5578e-02, PNorm = 68.0371, GNorm = 1.0916, lr_0 = 2.5851e-04
Loss = 6.3544e-02, PNorm = 68.0435, GNorm = 0.6230, lr_0 = 2.5833e-04
Loss = 6.0742e-02, PNorm = 68.0480, GNorm = 0.5340, lr_0 = 2.5815e-04
Loss = 5.2178e-02, PNorm = 68.0516, GNorm = 0.3964, lr_0 = 2.5797e-04
Loss = 5.7076e-02, PNorm = 68.0538, GNorm = 0.5821, lr_0 = 2.5780e-04
Loss = 5.7180e-02, PNorm = 68.0581, GNorm = 0.5660, lr_0 = 2.5762e-04
Loss = 5.3607e-02, PNorm = 68.0641, GNorm = 0.5284, lr_0 = 2.5745e-04
Loss = 5.0623e-02, PNorm = 68.0685, GNorm = 0.4478, lr_0 = 2.5727e-04
Loss = 5.4378e-02, PNorm = 68.0727, GNorm = 0.3898, lr_0 = 2.5709e-04
Loss = 5.5165e-02, PNorm = 68.0776, GNorm = 0.5028, lr_0 = 2.5692e-04
Loss = 5.6640e-02, PNorm = 68.0854, GNorm = 0.6513, lr_0 = 2.5674e-04
Loss = 5.4099e-02, PNorm = 68.0929, GNorm = 0.4292, lr_0 = 2.5656e-04
Loss = 4.8979e-02, PNorm = 68.0977, GNorm = 0.5412, lr_0 = 2.5639e-04
Loss = 5.0170e-02, PNorm = 68.1023, GNorm = 0.5201, lr_0 = 2.5621e-04
Loss = 5.7452e-02, PNorm = 68.1060, GNorm = 0.4281, lr_0 = 2.5604e-04
Loss = 5.3325e-02, PNorm = 68.1084, GNorm = 0.5218, lr_0 = 2.5586e-04
Loss = 5.2730e-02, PNorm = 68.1129, GNorm = 0.5672, lr_0 = 2.5569e-04
Loss = 5.5246e-02, PNorm = 68.1172, GNorm = 0.5042, lr_0 = 2.5551e-04
Loss = 4.6383e-02, PNorm = 68.1202, GNorm = 0.5755, lr_0 = 2.5534e-04
Loss = 5.5949e-02, PNorm = 68.1243, GNorm = 0.5723, lr_0 = 2.5516e-04
Loss = 5.7685e-02, PNorm = 68.1274, GNorm = 0.5728, lr_0 = 2.5499e-04
Loss = 5.0668e-02, PNorm = 68.1334, GNorm = 0.6081, lr_0 = 2.5481e-04
Loss = 4.8181e-02, PNorm = 68.1390, GNorm = 0.7661, lr_0 = 2.5464e-04
Loss = 5.3740e-02, PNorm = 68.1452, GNorm = 0.5429, lr_0 = 2.5446e-04
Loss = 4.9380e-02, PNorm = 68.1528, GNorm = 0.5394, lr_0 = 2.5429e-04
Loss = 5.5017e-02, PNorm = 68.1586, GNorm = 0.7394, lr_0 = 2.5411e-04
Loss = 4.7185e-02, PNorm = 68.1628, GNorm = 0.3922, lr_0 = 2.5394e-04
Loss = 5.2116e-02, PNorm = 68.1673, GNorm = 0.7865, lr_0 = 2.5377e-04
Loss = 4.8758e-02, PNorm = 68.1698, GNorm = 0.3876, lr_0 = 2.5359e-04
Loss = 5.7669e-02, PNorm = 68.1738, GNorm = 0.5313, lr_0 = 2.5342e-04
Loss = 6.3012e-02, PNorm = 68.1806, GNorm = 1.0254, lr_0 = 2.5325e-04
Loss = 5.5072e-02, PNorm = 68.1842, GNorm = 0.5484, lr_0 = 2.5307e-04
Loss = 5.8855e-02, PNorm = 68.1876, GNorm = 0.9477, lr_0 = 2.5290e-04
Loss = 5.5032e-02, PNorm = 68.1928, GNorm = 0.8655, lr_0 = 2.5273e-04
Loss = 6.1136e-02, PNorm = 68.1989, GNorm = 0.6881, lr_0 = 2.5255e-04
Loss = 5.7210e-02, PNorm = 68.2021, GNorm = 0.5878, lr_0 = 2.5238e-04
Loss = 4.9525e-02, PNorm = 68.2062, GNorm = 0.5182, lr_0 = 2.5221e-04
Loss = 5.3662e-02, PNorm = 68.2115, GNorm = 0.4992, lr_0 = 2.5203e-04
Loss = 4.9244e-02, PNorm = 68.2189, GNorm = 0.5383, lr_0 = 2.5186e-04
Loss = 5.3134e-02, PNorm = 68.2254, GNorm = 0.6140, lr_0 = 2.5169e-04
Loss = 5.1404e-02, PNorm = 68.2287, GNorm = 0.5339, lr_0 = 2.5152e-04
Loss = 6.2454e-02, PNorm = 68.2327, GNorm = 0.5237, lr_0 = 2.5134e-04
Loss = 5.9891e-02, PNorm = 68.2387, GNorm = 0.8416, lr_0 = 2.5117e-04
Loss = 5.4049e-02, PNorm = 68.2409, GNorm = 0.6735, lr_0 = 2.5100e-04
Loss = 5.7624e-02, PNorm = 68.2457, GNorm = 0.6761, lr_0 = 2.5083e-04
Loss = 6.1033e-02, PNorm = 68.2493, GNorm = 0.7682, lr_0 = 2.5066e-04
Loss = 5.3930e-02, PNorm = 68.2537, GNorm = 0.6072, lr_0 = 2.5048e-04
Loss = 5.7633e-02, PNorm = 68.2574, GNorm = 0.6312, lr_0 = 2.5031e-04
Loss = 5.2011e-02, PNorm = 68.2608, GNorm = 0.6830, lr_0 = 2.5014e-04
Loss = 5.1654e-02, PNorm = 68.2640, GNorm = 0.5540, lr_0 = 2.4997e-04
Loss = 5.2046e-02, PNorm = 68.2689, GNorm = 0.4407, lr_0 = 2.4980e-04
Loss = 5.9971e-02, PNorm = 68.2754, GNorm = 0.4363, lr_0 = 2.4963e-04
Loss = 6.3272e-02, PNorm = 68.2808, GNorm = 0.8040, lr_0 = 2.4946e-04
Loss = 5.8746e-02, PNorm = 68.2857, GNorm = 0.7505, lr_0 = 2.4929e-04
Loss = 5.5084e-02, PNorm = 68.2882, GNorm = 0.4633, lr_0 = 2.4911e-04
Loss = 6.0053e-02, PNorm = 68.2940, GNorm = 0.7149, lr_0 = 2.4894e-04
Loss = 5.8989e-02, PNorm = 68.3011, GNorm = 0.5350, lr_0 = 2.4877e-04
Loss = 6.2147e-02, PNorm = 68.3077, GNorm = 0.5329, lr_0 = 2.4860e-04
Loss = 5.3729e-02, PNorm = 68.3123, GNorm = 0.4772, lr_0 = 2.4843e-04
Loss = 5.2374e-02, PNorm = 68.3170, GNorm = 0.4825, lr_0 = 2.4826e-04
Loss = 5.5000e-02, PNorm = 68.3215, GNorm = 0.6729, lr_0 = 2.4809e-04
Loss = 6.1957e-02, PNorm = 68.3250, GNorm = 0.3960, lr_0 = 2.4792e-04
Loss = 5.1960e-02, PNorm = 68.3282, GNorm = 1.1853, lr_0 = 2.4775e-04
Loss = 5.5066e-02, PNorm = 68.3309, GNorm = 0.5171, lr_0 = 2.4758e-04
Loss = 6.0004e-02, PNorm = 68.3350, GNorm = 0.3775, lr_0 = 2.4741e-04
Loss = 5.6320e-02, PNorm = 68.3392, GNorm = 0.5669, lr_0 = 2.4724e-04
Loss = 5.6056e-02, PNorm = 68.3445, GNorm = 0.5173, lr_0 = 2.4707e-04
Validation mae = 0.387910
Epoch 19
Loss = 4.6287e-02, PNorm = 68.3494, GNorm = 0.4568, lr_0 = 2.4690e-04
Loss = 4.5419e-02, PNorm = 68.3536, GNorm = 0.5151, lr_0 = 2.4674e-04
Loss = 4.2702e-02, PNorm = 68.3585, GNorm = 0.7747, lr_0 = 2.4657e-04
Loss = 5.6117e-02, PNorm = 68.3628, GNorm = 0.8805, lr_0 = 2.4640e-04
Loss = 5.8598e-02, PNorm = 68.3686, GNorm = 0.5802, lr_0 = 2.4623e-04
Loss = 5.0238e-02, PNorm = 68.3726, GNorm = 0.5024, lr_0 = 2.4606e-04
Loss = 4.2197e-02, PNorm = 68.3766, GNorm = 0.5982, lr_0 = 2.4589e-04
Loss = 4.2587e-02, PNorm = 68.3808, GNorm = 0.3499, lr_0 = 2.4572e-04
Loss = 5.1477e-02, PNorm = 68.3844, GNorm = 0.5240, lr_0 = 2.4556e-04
Loss = 5.5222e-02, PNorm = 68.3896, GNorm = 0.6151, lr_0 = 2.4539e-04
Loss = 4.6507e-02, PNorm = 68.3964, GNorm = 0.4845, lr_0 = 2.4522e-04
Loss = 4.7591e-02, PNorm = 68.4021, GNorm = 0.5988, lr_0 = 2.4505e-04
Loss = 5.2655e-02, PNorm = 68.4059, GNorm = 0.5862, lr_0 = 2.4488e-04
Loss = 4.1827e-02, PNorm = 68.4090, GNorm = 0.4303, lr_0 = 2.4472e-04
Loss = 4.6565e-02, PNorm = 68.4118, GNorm = 0.7788, lr_0 = 2.4455e-04
Loss = 5.2157e-02, PNorm = 68.4135, GNorm = 0.5574, lr_0 = 2.4438e-04
Loss = 5.1002e-02, PNorm = 68.4188, GNorm = 0.6529, lr_0 = 2.4421e-04
Loss = 5.5435e-02, PNorm = 68.4223, GNorm = 0.5820, lr_0 = 2.4405e-04
Loss = 5.1333e-02, PNorm = 68.4281, GNorm = 0.5814, lr_0 = 2.4388e-04
Loss = 6.0671e-02, PNorm = 68.4367, GNorm = 0.6813, lr_0 = 2.4371e-04
Loss = 5.1261e-02, PNorm = 68.4417, GNorm = 0.6376, lr_0 = 2.4354e-04
Loss = 5.0875e-02, PNorm = 68.4461, GNorm = 0.5966, lr_0 = 2.4338e-04
Loss = 4.5896e-02, PNorm = 68.4502, GNorm = 0.6548, lr_0 = 2.4321e-04
Loss = 4.6330e-02, PNorm = 68.4549, GNorm = 0.4409, lr_0 = 2.4304e-04
Loss = 5.1527e-02, PNorm = 68.4589, GNorm = 0.5373, lr_0 = 2.4288e-04
Loss = 4.1986e-02, PNorm = 68.4618, GNorm = 0.3915, lr_0 = 2.4271e-04
Loss = 4.6895e-02, PNorm = 68.4658, GNorm = 0.6683, lr_0 = 2.4254e-04
Loss = 4.7476e-02, PNorm = 68.4703, GNorm = 0.5624, lr_0 = 2.4238e-04
Loss = 4.4069e-02, PNorm = 68.4738, GNorm = 0.4609, lr_0 = 2.4221e-04
Loss = 4.7259e-02, PNorm = 68.4784, GNorm = 0.4357, lr_0 = 2.4205e-04
Loss = 5.1528e-02, PNorm = 68.4829, GNorm = 0.8384, lr_0 = 2.4188e-04
Loss = 4.2852e-02, PNorm = 68.4854, GNorm = 0.6698, lr_0 = 2.4171e-04
Loss = 4.6319e-02, PNorm = 68.4894, GNorm = 0.5983, lr_0 = 2.4155e-04
Loss = 4.7063e-02, PNorm = 68.4963, GNorm = 0.5560, lr_0 = 2.4138e-04
Loss = 4.7939e-02, PNorm = 68.5014, GNorm = 0.4225, lr_0 = 2.4122e-04
Loss = 5.4397e-02, PNorm = 68.5052, GNorm = 0.4318, lr_0 = 2.4105e-04
Loss = 5.3339e-02, PNorm = 68.5083, GNorm = 0.4585, lr_0 = 2.4089e-04
Loss = 5.1513e-02, PNorm = 68.5127, GNorm = 0.5088, lr_0 = 2.4072e-04
Loss = 5.2881e-02, PNorm = 68.5195, GNorm = 0.6387, lr_0 = 2.4056e-04
Loss = 4.9201e-02, PNorm = 68.5249, GNorm = 0.5227, lr_0 = 2.4039e-04
Loss = 5.3332e-02, PNorm = 68.5289, GNorm = 0.4800, lr_0 = 2.4023e-04
Loss = 4.5425e-02, PNorm = 68.5335, GNorm = 0.5674, lr_0 = 2.4006e-04
Loss = 4.6136e-02, PNorm = 68.5378, GNorm = 0.9515, lr_0 = 2.3990e-04
Loss = 5.2279e-02, PNorm = 68.5404, GNorm = 0.4778, lr_0 = 2.3974e-04
Loss = 4.9240e-02, PNorm = 68.5429, GNorm = 0.8643, lr_0 = 2.3957e-04
Loss = 5.2470e-02, PNorm = 68.5461, GNorm = 0.6477, lr_0 = 2.3941e-04
Loss = 5.0423e-02, PNorm = 68.5487, GNorm = 0.4116, lr_0 = 2.3924e-04
Loss = 4.7697e-02, PNorm = 68.5527, GNorm = 0.4897, lr_0 = 2.3908e-04
Loss = 5.2925e-02, PNorm = 68.5557, GNorm = 0.4499, lr_0 = 2.3892e-04
Loss = 5.5493e-02, PNorm = 68.5579, GNorm = 0.5552, lr_0 = 2.3875e-04
Loss = 4.8205e-02, PNorm = 68.5606, GNorm = 0.7409, lr_0 = 2.3859e-04
Loss = 4.2535e-02, PNorm = 68.5654, GNorm = 0.6569, lr_0 = 2.3842e-04
Loss = 4.7055e-02, PNorm = 68.5708, GNorm = 0.4955, lr_0 = 2.3826e-04
Loss = 4.9255e-02, PNorm = 68.5732, GNorm = 0.3758, lr_0 = 2.3810e-04
Loss = 4.9215e-02, PNorm = 68.5769, GNorm = 0.5479, lr_0 = 2.3794e-04
Loss = 4.5484e-02, PNorm = 68.5810, GNorm = 0.5208, lr_0 = 2.3777e-04
Loss = 4.6510e-02, PNorm = 68.5848, GNorm = 0.5418, lr_0 = 2.3761e-04
Loss = 5.3968e-02, PNorm = 68.5893, GNorm = 0.4371, lr_0 = 2.3745e-04
Loss = 5.7612e-02, PNorm = 68.5943, GNorm = 0.3623, lr_0 = 2.3728e-04
Loss = 5.8032e-02, PNorm = 68.5987, GNorm = 1.0416, lr_0 = 2.3712e-04
Loss = 5.9570e-02, PNorm = 68.6061, GNorm = 0.5989, lr_0 = 2.3696e-04
Loss = 5.8714e-02, PNorm = 68.6139, GNorm = 0.5652, lr_0 = 2.3680e-04
Loss = 5.4880e-02, PNorm = 68.6194, GNorm = 0.5922, lr_0 = 2.3663e-04
Loss = 5.4259e-02, PNorm = 68.6235, GNorm = 0.4372, lr_0 = 2.3647e-04
Loss = 5.6176e-02, PNorm = 68.6268, GNorm = 0.4531, lr_0 = 2.3631e-04
Loss = 5.8832e-02, PNorm = 68.6304, GNorm = 0.6705, lr_0 = 2.3615e-04
Loss = 5.3405e-02, PNorm = 68.6335, GNorm = 0.8508, lr_0 = 2.3599e-04
Loss = 5.6532e-02, PNorm = 68.6371, GNorm = 0.6907, lr_0 = 2.3582e-04
Loss = 5.2041e-02, PNorm = 68.6406, GNorm = 0.5498, lr_0 = 2.3566e-04
Loss = 5.0956e-02, PNorm = 68.6439, GNorm = 0.4739, lr_0 = 2.3550e-04
Loss = 4.8184e-02, PNorm = 68.6454, GNorm = 0.4008, lr_0 = 2.3534e-04
Loss = 5.2945e-02, PNorm = 68.6483, GNorm = 0.6398, lr_0 = 2.3518e-04
Loss = 4.6198e-02, PNorm = 68.6515, GNorm = 0.4593, lr_0 = 2.3502e-04
Loss = 5.1250e-02, PNorm = 68.6554, GNorm = 0.6146, lr_0 = 2.3486e-04
Loss = 5.0939e-02, PNorm = 68.6595, GNorm = 0.5585, lr_0 = 2.3470e-04
Loss = 4.9086e-02, PNorm = 68.6637, GNorm = 0.4689, lr_0 = 2.3454e-04
Loss = 4.7986e-02, PNorm = 68.6677, GNorm = 0.6029, lr_0 = 2.3437e-04
Loss = 5.7850e-02, PNorm = 68.6705, GNorm = 0.5897, lr_0 = 2.3421e-04
Loss = 5.5391e-02, PNorm = 68.6738, GNorm = 0.5831, lr_0 = 2.3405e-04
Loss = 5.1553e-02, PNorm = 68.6768, GNorm = 0.6095, lr_0 = 2.3389e-04
Loss = 4.8365e-02, PNorm = 68.6819, GNorm = 0.4987, lr_0 = 2.3373e-04
Loss = 5.2634e-02, PNorm = 68.6857, GNorm = 0.2976, lr_0 = 2.3357e-04
Loss = 5.1429e-02, PNorm = 68.6906, GNorm = 0.6096, lr_0 = 2.3341e-04
Loss = 5.0447e-02, PNorm = 68.6954, GNorm = 0.5413, lr_0 = 2.3325e-04
Loss = 5.2130e-02, PNorm = 68.7005, GNorm = 0.5176, lr_0 = 2.3309e-04
Loss = 5.5721e-02, PNorm = 68.7036, GNorm = 0.6591, lr_0 = 2.3293e-04
Loss = 4.7764e-02, PNorm = 68.7081, GNorm = 0.6536, lr_0 = 2.3277e-04
Loss = 4.9068e-02, PNorm = 68.7121, GNorm = 0.4698, lr_0 = 2.3261e-04
Loss = 5.3210e-02, PNorm = 68.7166, GNorm = 0.4771, lr_0 = 2.3246e-04
Loss = 5.4826e-02, PNorm = 68.7194, GNorm = 0.5178, lr_0 = 2.3230e-04
Loss = 5.3001e-02, PNorm = 68.7222, GNorm = 0.4584, lr_0 = 2.3214e-04
Loss = 4.5094e-02, PNorm = 68.7243, GNorm = 0.4995, lr_0 = 2.3198e-04
Loss = 5.4431e-02, PNorm = 68.7279, GNorm = 0.4267, lr_0 = 2.3182e-04
Loss = 4.3237e-02, PNorm = 68.7322, GNorm = 0.4796, lr_0 = 2.3166e-04
Loss = 4.8827e-02, PNorm = 68.7358, GNorm = 0.4750, lr_0 = 2.3150e-04
Loss = 4.7307e-02, PNorm = 68.7369, GNorm = 0.9682, lr_0 = 2.3134e-04
Loss = 5.3508e-02, PNorm = 68.7394, GNorm = 0.4921, lr_0 = 2.3118e-04
Loss = 5.3042e-02, PNorm = 68.7435, GNorm = 0.8275, lr_0 = 2.3103e-04
Loss = 5.2472e-02, PNorm = 68.7481, GNorm = 0.7359, lr_0 = 2.3087e-04
Loss = 4.6095e-02, PNorm = 68.7528, GNorm = 0.5066, lr_0 = 2.3071e-04
Loss = 4.8628e-02, PNorm = 68.7559, GNorm = 0.7428, lr_0 = 2.3055e-04
Loss = 4.2308e-02, PNorm = 68.7577, GNorm = 0.4720, lr_0 = 2.3039e-04
Loss = 4.6503e-02, PNorm = 68.7588, GNorm = 0.4505, lr_0 = 2.3024e-04
Loss = 4.6020e-02, PNorm = 68.7614, GNorm = 0.5172, lr_0 = 2.3008e-04
Loss = 4.5279e-02, PNorm = 68.7640, GNorm = 0.7527, lr_0 = 2.2992e-04
Loss = 4.9422e-02, PNorm = 68.7684, GNorm = 0.5980, lr_0 = 2.2976e-04
Loss = 4.7068e-02, PNorm = 68.7724, GNorm = 0.6961, lr_0 = 2.2961e-04
Loss = 5.3946e-02, PNorm = 68.7756, GNorm = 0.5530, lr_0 = 2.2945e-04
Loss = 4.7773e-02, PNorm = 68.7791, GNorm = 0.5921, lr_0 = 2.2929e-04
Loss = 5.1811e-02, PNorm = 68.7828, GNorm = 0.4971, lr_0 = 2.2913e-04
Loss = 5.5201e-02, PNorm = 68.7863, GNorm = 0.5073, lr_0 = 2.2898e-04
Loss = 5.8201e-02, PNorm = 68.7894, GNorm = 0.8286, lr_0 = 2.2882e-04
Loss = 4.7618e-02, PNorm = 68.7927, GNorm = 0.3901, lr_0 = 2.2866e-04
Loss = 4.4943e-02, PNorm = 68.7959, GNorm = 0.5635, lr_0 = 2.2851e-04
Loss = 5.3734e-02, PNorm = 68.7987, GNorm = 0.4770, lr_0 = 2.2835e-04
Loss = 5.5233e-02, PNorm = 68.8026, GNorm = 0.6685, lr_0 = 2.2819e-04
Loss = 5.0566e-02, PNorm = 68.8072, GNorm = 0.4892, lr_0 = 2.2804e-04
Loss = 5.2827e-02, PNorm = 68.8107, GNorm = 0.5172, lr_0 = 2.2788e-04
Loss = 4.9076e-02, PNorm = 68.8145, GNorm = 0.4738, lr_0 = 2.2773e-04
Loss = 5.3888e-02, PNorm = 68.8187, GNorm = 0.5523, lr_0 = 2.2757e-04
Validation mae = 0.387835
Epoch 20
Loss = 4.5695e-02, PNorm = 68.8241, GNorm = 0.4962, lr_0 = 2.2741e-04
Loss = 5.4679e-02, PNorm = 68.8279, GNorm = 0.4848, lr_0 = 2.2726e-04
Loss = 4.4600e-02, PNorm = 68.8304, GNorm = 0.6340, lr_0 = 2.2710e-04
Loss = 4.0055e-02, PNorm = 68.8331, GNorm = 0.5719, lr_0 = 2.2695e-04
Loss = 3.5450e-02, PNorm = 68.8368, GNorm = 0.4003, lr_0 = 2.2679e-04
Loss = 4.4051e-02, PNorm = 68.8412, GNorm = 0.5276, lr_0 = 2.2664e-04
Loss = 4.7911e-02, PNorm = 68.8483, GNorm = 0.6839, lr_0 = 2.2648e-04
Loss = 3.9787e-02, PNorm = 68.8532, GNorm = 0.6531, lr_0 = 2.2632e-04
Loss = 4.6559e-02, PNorm = 68.8585, GNorm = 0.5191, lr_0 = 2.2617e-04
Loss = 4.8172e-02, PNorm = 68.8633, GNorm = 0.6902, lr_0 = 2.2601e-04
Loss = 5.2071e-02, PNorm = 68.8682, GNorm = 0.6505, lr_0 = 2.2586e-04
Loss = 4.5240e-02, PNorm = 68.8727, GNorm = 0.4039, lr_0 = 2.2571e-04
Loss = 4.6685e-02, PNorm = 68.8761, GNorm = 0.3401, lr_0 = 2.2555e-04
Loss = 4.5085e-02, PNorm = 68.8814, GNorm = 0.5894, lr_0 = 2.2540e-04
Loss = 4.4504e-02, PNorm = 68.8869, GNorm = 0.5259, lr_0 = 2.2524e-04
Loss = 4.3691e-02, PNorm = 68.8904, GNorm = 0.4346, lr_0 = 2.2509e-04
Loss = 5.0357e-02, PNorm = 68.8924, GNorm = 0.7008, lr_0 = 2.2493e-04
Loss = 4.4636e-02, PNorm = 68.8980, GNorm = 0.8643, lr_0 = 2.2478e-04
Loss = 4.5093e-02, PNorm = 68.9035, GNorm = 0.4271, lr_0 = 2.2463e-04
Loss = 3.8218e-02, PNorm = 68.9066, GNorm = 0.5717, lr_0 = 2.2447e-04
Loss = 4.4850e-02, PNorm = 68.9102, GNorm = 0.6181, lr_0 = 2.2432e-04
Loss = 3.9700e-02, PNorm = 68.9144, GNorm = 0.6711, lr_0 = 2.2416e-04
Loss = 4.6300e-02, PNorm = 68.9192, GNorm = 0.4137, lr_0 = 2.2401e-04
Loss = 5.5168e-02, PNorm = 68.9241, GNorm = 0.6641, lr_0 = 2.2386e-04
Loss = 5.3599e-02, PNorm = 68.9278, GNorm = 0.3709, lr_0 = 2.2370e-04
Loss = 4.2968e-02, PNorm = 68.9312, GNorm = 0.4619, lr_0 = 2.2355e-04
Loss = 4.5920e-02, PNorm = 68.9340, GNorm = 0.8281, lr_0 = 2.2340e-04
Loss = 5.0879e-02, PNorm = 68.9378, GNorm = 0.7262, lr_0 = 2.2324e-04
Loss = 4.4026e-02, PNorm = 68.9413, GNorm = 0.5431, lr_0 = 2.2309e-04
Loss = 4.7562e-02, PNorm = 68.9446, GNorm = 0.4410, lr_0 = 2.2294e-04
Loss = 4.8624e-02, PNorm = 68.9481, GNorm = 0.8898, lr_0 = 2.2279e-04
Loss = 4.9775e-02, PNorm = 68.9521, GNorm = 0.5373, lr_0 = 2.2263e-04
Loss = 4.9207e-02, PNorm = 68.9558, GNorm = 0.4908, lr_0 = 2.2248e-04
Loss = 3.7404e-02, PNorm = 68.9606, GNorm = 0.4796, lr_0 = 2.2233e-04
Loss = 4.3695e-02, PNorm = 68.9640, GNorm = 0.4127, lr_0 = 2.2218e-04
Loss = 5.0163e-02, PNorm = 68.9662, GNorm = 0.5923, lr_0 = 2.2202e-04
Loss = 4.8439e-02, PNorm = 68.9695, GNorm = 0.4839, lr_0 = 2.2187e-04
Loss = 3.5630e-02, PNorm = 68.9710, GNorm = 0.4784, lr_0 = 2.2172e-04
Loss = 4.6592e-02, PNorm = 68.9735, GNorm = 0.5846, lr_0 = 2.2157e-04
Loss = 4.4684e-02, PNorm = 68.9776, GNorm = 0.4424, lr_0 = 2.2142e-04
Loss = 5.0146e-02, PNorm = 68.9824, GNorm = 0.5784, lr_0 = 2.2126e-04
Loss = 4.1549e-02, PNorm = 68.9875, GNorm = 0.3917, lr_0 = 2.2111e-04
Loss = 3.7943e-02, PNorm = 68.9902, GNorm = 0.4550, lr_0 = 2.2096e-04
Loss = 4.6776e-02, PNorm = 68.9956, GNorm = 0.6851, lr_0 = 2.2081e-04
Loss = 4.5454e-02, PNorm = 69.0003, GNorm = 0.5044, lr_0 = 2.2066e-04
Loss = 4.3131e-02, PNorm = 69.0065, GNorm = 0.5826, lr_0 = 2.2051e-04
Loss = 4.7879e-02, PNorm = 69.0090, GNorm = 0.6271, lr_0 = 2.2036e-04
Loss = 4.3613e-02, PNorm = 69.0129, GNorm = 0.4876, lr_0 = 2.2021e-04
Loss = 5.0114e-02, PNorm = 69.0163, GNorm = 0.4878, lr_0 = 2.2005e-04
Loss = 4.5292e-02, PNorm = 69.0186, GNorm = 0.3931, lr_0 = 2.1990e-04
Loss = 4.7833e-02, PNorm = 69.0213, GNorm = 0.4956, lr_0 = 2.1975e-04
Loss = 5.2943e-02, PNorm = 69.0261, GNorm = 0.3646, lr_0 = 2.1960e-04
Loss = 5.6849e-02, PNorm = 69.0311, GNorm = 0.4841, lr_0 = 2.1945e-04
Loss = 4.8451e-02, PNorm = 69.0347, GNorm = 0.4493, lr_0 = 2.1930e-04
Loss = 4.9651e-02, PNorm = 69.0376, GNorm = 0.5060, lr_0 = 2.1915e-04
Loss = 6.1023e-02, PNorm = 69.0445, GNorm = 0.8623, lr_0 = 2.1900e-04
Loss = 4.6600e-02, PNorm = 69.0500, GNorm = 0.6037, lr_0 = 2.1885e-04
Loss = 4.1642e-02, PNorm = 69.0543, GNorm = 0.5304, lr_0 = 2.1870e-04
Loss = 4.7769e-02, PNorm = 69.0565, GNorm = 0.4465, lr_0 = 2.1855e-04
Loss = 5.0689e-02, PNorm = 69.0579, GNorm = 0.5689, lr_0 = 2.1840e-04
Loss = 4.8750e-02, PNorm = 69.0603, GNorm = 0.4168, lr_0 = 2.1825e-04
Loss = 4.4407e-02, PNorm = 69.0629, GNorm = 0.5332, lr_0 = 2.1810e-04
Loss = 4.4965e-02, PNorm = 69.0647, GNorm = 0.4304, lr_0 = 2.1795e-04
Loss = 4.4416e-02, PNorm = 69.0680, GNorm = 0.4285, lr_0 = 2.1780e-04
Loss = 5.2191e-02, PNorm = 69.0708, GNorm = 0.7654, lr_0 = 2.1765e-04
Loss = 5.0674e-02, PNorm = 69.0727, GNorm = 0.4295, lr_0 = 2.1751e-04
Loss = 4.8023e-02, PNorm = 69.0774, GNorm = 0.5801, lr_0 = 2.1736e-04
Loss = 5.2385e-02, PNorm = 69.0813, GNorm = 0.5364, lr_0 = 2.1721e-04
Loss = 4.6312e-02, PNorm = 69.0842, GNorm = 0.7086, lr_0 = 2.1706e-04
Loss = 4.6679e-02, PNorm = 69.0873, GNorm = 0.5969, lr_0 = 2.1691e-04
Loss = 4.4275e-02, PNorm = 69.0899, GNorm = 0.4498, lr_0 = 2.1676e-04
Loss = 5.0291e-02, PNorm = 69.0945, GNorm = 0.5487, lr_0 = 2.1661e-04
Loss = 4.4827e-02, PNorm = 69.0998, GNorm = 0.3852, lr_0 = 2.1646e-04
Loss = 4.6530e-02, PNorm = 69.1039, GNorm = 0.5934, lr_0 = 2.1632e-04
Loss = 4.3043e-02, PNorm = 69.1065, GNorm = 0.5260, lr_0 = 2.1617e-04
Loss = 4.4699e-02, PNorm = 69.1110, GNorm = 0.3819, lr_0 = 2.1602e-04
Loss = 4.5789e-02, PNorm = 69.1145, GNorm = 0.3926, lr_0 = 2.1587e-04
Loss = 5.6311e-02, PNorm = 69.1175, GNorm = 0.3576, lr_0 = 2.1572e-04
Loss = 5.4949e-02, PNorm = 69.1188, GNorm = 0.6326, lr_0 = 2.1558e-04
Loss = 4.4479e-02, PNorm = 69.1211, GNorm = 0.5171, lr_0 = 2.1543e-04
Loss = 4.4134e-02, PNorm = 69.1247, GNorm = 0.5045, lr_0 = 2.1528e-04
Loss = 4.6471e-02, PNorm = 69.1298, GNorm = 0.4037, lr_0 = 2.1513e-04
Loss = 4.7132e-02, PNorm = 69.1334, GNorm = 0.4177, lr_0 = 2.1499e-04
Loss = 4.9531e-02, PNorm = 69.1375, GNorm = 0.6777, lr_0 = 2.1484e-04
Loss = 5.0762e-02, PNorm = 69.1419, GNorm = 0.4862, lr_0 = 2.1469e-04
Loss = 5.0056e-02, PNorm = 69.1465, GNorm = 0.6814, lr_0 = 2.1454e-04
Loss = 4.3157e-02, PNorm = 69.1502, GNorm = 0.6220, lr_0 = 2.1440e-04
Loss = 4.2556e-02, PNorm = 69.1538, GNorm = 0.4677, lr_0 = 2.1425e-04
Loss = 4.3544e-02, PNorm = 69.1560, GNorm = 0.5140, lr_0 = 2.1410e-04
Loss = 4.6703e-02, PNorm = 69.1593, GNorm = 0.6196, lr_0 = 2.1396e-04
Loss = 5.1234e-02, PNorm = 69.1652, GNorm = 0.6003, lr_0 = 2.1381e-04
Loss = 4.4086e-02, PNorm = 69.1707, GNorm = 0.5351, lr_0 = 2.1366e-04
Loss = 4.5906e-02, PNorm = 69.1735, GNorm = 0.5905, lr_0 = 2.1352e-04
Loss = 4.4848e-02, PNorm = 69.1761, GNorm = 0.5915, lr_0 = 2.1337e-04
Loss = 4.3118e-02, PNorm = 69.1787, GNorm = 0.5475, lr_0 = 2.1323e-04
Loss = 5.2090e-02, PNorm = 69.1808, GNorm = 0.4289, lr_0 = 2.1308e-04
Loss = 5.1244e-02, PNorm = 69.1856, GNorm = 0.5159, lr_0 = 2.1293e-04
Loss = 5.0014e-02, PNorm = 69.1908, GNorm = 0.5906, lr_0 = 2.1279e-04
Loss = 4.3960e-02, PNorm = 69.1957, GNorm = 0.7125, lr_0 = 2.1264e-04
Loss = 5.0364e-02, PNorm = 69.1986, GNorm = 0.4748, lr_0 = 2.1250e-04
Loss = 4.8625e-02, PNorm = 69.2017, GNorm = 0.4681, lr_0 = 2.1235e-04
Loss = 4.4992e-02, PNorm = 69.2045, GNorm = 0.4458, lr_0 = 2.1221e-04
Loss = 4.6905e-02, PNorm = 69.2081, GNorm = 0.5380, lr_0 = 2.1206e-04
Loss = 4.5324e-02, PNorm = 69.2111, GNorm = 0.4915, lr_0 = 2.1191e-04
Loss = 4.3870e-02, PNorm = 69.2124, GNorm = 0.6423, lr_0 = 2.1177e-04
Loss = 5.4019e-02, PNorm = 69.2146, GNorm = 0.6366, lr_0 = 2.1162e-04
Loss = 4.5333e-02, PNorm = 69.2181, GNorm = 0.4235, lr_0 = 2.1148e-04
Loss = 4.8311e-02, PNorm = 69.2197, GNorm = 0.7472, lr_0 = 2.1133e-04
Loss = 5.2007e-02, PNorm = 69.2228, GNorm = 1.0937, lr_0 = 2.1119e-04
Loss = 4.7592e-02, PNorm = 69.2278, GNorm = 0.4666, lr_0 = 2.1104e-04
Loss = 4.5512e-02, PNorm = 69.2313, GNorm = 0.4167, lr_0 = 2.1090e-04
Loss = 4.9090e-02, PNorm = 69.2339, GNorm = 0.7855, lr_0 = 2.1076e-04
Loss = 6.2199e-02, PNorm = 69.2373, GNorm = 0.8002, lr_0 = 2.1061e-04
Loss = 5.1542e-02, PNorm = 69.2437, GNorm = 0.6865, lr_0 = 2.1047e-04
Loss = 4.6473e-02, PNorm = 69.2494, GNorm = 0.4071, lr_0 = 2.1032e-04
Loss = 5.6762e-02, PNorm = 69.2516, GNorm = 0.8368, lr_0 = 2.1018e-04
Loss = 5.1554e-02, PNorm = 69.2549, GNorm = 0.6250, lr_0 = 2.1003e-04
Loss = 5.0866e-02, PNorm = 69.2596, GNorm = 0.4997, lr_0 = 2.0989e-04
Loss = 4.5430e-02, PNorm = 69.2629, GNorm = 0.5352, lr_0 = 2.0975e-04
Loss = 5.4594e-02, PNorm = 69.2663, GNorm = 0.9458, lr_0 = 2.0960e-04
Validation mae = 0.392372
Epoch 21
Loss = 4.8969e-02, PNorm = 69.2704, GNorm = 0.6098, lr_0 = 2.0946e-04
Loss = 4.0528e-02, PNorm = 69.2735, GNorm = 0.4795, lr_0 = 2.0932e-04
Loss = 4.0394e-02, PNorm = 69.2777, GNorm = 0.3890, lr_0 = 2.0917e-04
Loss = 3.9193e-02, PNorm = 69.2825, GNorm = 0.5302, lr_0 = 2.0903e-04
Loss = 4.3722e-02, PNorm = 69.2863, GNorm = 0.4445, lr_0 = 2.0889e-04
Loss = 3.2893e-02, PNorm = 69.2893, GNorm = 0.4339, lr_0 = 2.0874e-04
Loss = 4.0564e-02, PNorm = 69.2935, GNorm = 0.4298, lr_0 = 2.0860e-04
Loss = 3.7650e-02, PNorm = 69.2968, GNorm = 0.3426, lr_0 = 2.0846e-04
Loss = 3.6632e-02, PNorm = 69.2998, GNorm = 0.5800, lr_0 = 2.0831e-04
Loss = 4.3891e-02, PNorm = 69.3043, GNorm = 0.5315, lr_0 = 2.0817e-04
Loss = 4.4764e-02, PNorm = 69.3067, GNorm = 0.4170, lr_0 = 2.0803e-04
Loss = 4.5473e-02, PNorm = 69.3108, GNorm = 0.5740, lr_0 = 2.0789e-04
Loss = 4.3950e-02, PNorm = 69.3156, GNorm = 0.5062, lr_0 = 2.0774e-04
Loss = 4.3503e-02, PNorm = 69.3195, GNorm = 0.4129, lr_0 = 2.0760e-04
Loss = 4.6690e-02, PNorm = 69.3236, GNorm = 0.5123, lr_0 = 2.0746e-04
Loss = 4.1601e-02, PNorm = 69.3266, GNorm = 0.4289, lr_0 = 2.0732e-04
Loss = 4.3572e-02, PNorm = 69.3306, GNorm = 0.6802, lr_0 = 2.0718e-04
Loss = 3.7563e-02, PNorm = 69.3354, GNorm = 0.6267, lr_0 = 2.0703e-04
Loss = 4.4118e-02, PNorm = 69.3392, GNorm = 0.5611, lr_0 = 2.0689e-04
Loss = 4.3386e-02, PNorm = 69.3421, GNorm = 0.5748, lr_0 = 2.0675e-04
Loss = 3.9232e-02, PNorm = 69.3458, GNorm = 0.8705, lr_0 = 2.0661e-04
Loss = 4.3796e-02, PNorm = 69.3487, GNorm = 0.5000, lr_0 = 2.0647e-04
Loss = 5.1964e-02, PNorm = 69.3517, GNorm = 0.4530, lr_0 = 2.0633e-04
Loss = 4.5509e-02, PNorm = 69.3552, GNorm = 0.5247, lr_0 = 2.0618e-04
Loss = 4.2837e-02, PNorm = 69.3588, GNorm = 0.5777, lr_0 = 2.0604e-04
Loss = 4.3879e-02, PNorm = 69.3628, GNorm = 0.5675, lr_0 = 2.0590e-04
Loss = 4.5928e-02, PNorm = 69.3657, GNorm = 1.1221, lr_0 = 2.0576e-04
Loss = 4.9164e-02, PNorm = 69.3705, GNorm = 0.4581, lr_0 = 2.0562e-04
Loss = 4.1521e-02, PNorm = 69.3771, GNorm = 0.5998, lr_0 = 2.0548e-04
Loss = 4.3397e-02, PNorm = 69.3819, GNorm = 0.4613, lr_0 = 2.0534e-04
Loss = 4.7972e-02, PNorm = 69.3855, GNorm = 0.8695, lr_0 = 2.0520e-04
Loss = 4.4056e-02, PNorm = 69.3892, GNorm = 0.5859, lr_0 = 2.0506e-04
Loss = 4.6196e-02, PNorm = 69.3914, GNorm = 0.4266, lr_0 = 2.0492e-04
Loss = 4.8930e-02, PNorm = 69.3943, GNorm = 0.4941, lr_0 = 2.0478e-04
Loss = 4.7049e-02, PNorm = 69.3979, GNorm = 0.4106, lr_0 = 2.0464e-04
Loss = 3.7867e-02, PNorm = 69.4007, GNorm = 0.3782, lr_0 = 2.0450e-04
Loss = 5.1186e-02, PNorm = 69.4032, GNorm = 0.5071, lr_0 = 2.0436e-04
Loss = 4.8973e-02, PNorm = 69.4042, GNorm = 0.5692, lr_0 = 2.0422e-04
Loss = 3.5694e-02, PNorm = 69.4060, GNorm = 0.3905, lr_0 = 2.0408e-04
Loss = 5.7551e-02, PNorm = 69.4090, GNorm = 0.9230, lr_0 = 2.0394e-04
Loss = 4.6094e-02, PNorm = 69.4139, GNorm = 0.6268, lr_0 = 2.0380e-04
Loss = 4.6419e-02, PNorm = 69.4196, GNorm = 0.5306, lr_0 = 2.0366e-04
Loss = 4.0181e-02, PNorm = 69.4243, GNorm = 0.5502, lr_0 = 2.0352e-04
Loss = 3.9401e-02, PNorm = 69.4274, GNorm = 0.5159, lr_0 = 2.0338e-04
Loss = 5.0911e-02, PNorm = 69.4311, GNorm = 0.5329, lr_0 = 2.0324e-04
Loss = 4.1937e-02, PNorm = 69.4342, GNorm = 0.6693, lr_0 = 2.0310e-04
Loss = 4.4638e-02, PNorm = 69.4381, GNorm = 0.3850, lr_0 = 2.0296e-04
Loss = 3.9520e-02, PNorm = 69.4414, GNorm = 0.6563, lr_0 = 2.0282e-04
Loss = 4.6634e-02, PNorm = 69.4459, GNorm = 0.5118, lr_0 = 2.0268e-04
Loss = 3.8890e-02, PNorm = 69.4473, GNorm = 0.4385, lr_0 = 2.0254e-04
Loss = 4.3548e-02, PNorm = 69.4505, GNorm = 0.4261, lr_0 = 2.0240e-04
Loss = 3.3879e-02, PNorm = 69.4529, GNorm = 0.3724, lr_0 = 2.0227e-04
Loss = 4.3878e-02, PNorm = 69.4549, GNorm = 0.4908, lr_0 = 2.0213e-04
Loss = 4.7264e-02, PNorm = 69.4597, GNorm = 0.5168, lr_0 = 2.0199e-04
Loss = 4.1920e-02, PNorm = 69.4632, GNorm = 0.4599, lr_0 = 2.0185e-04
Loss = 4.6418e-02, PNorm = 69.4681, GNorm = 0.5241, lr_0 = 2.0171e-04
Loss = 4.5010e-02, PNorm = 69.4702, GNorm = 0.4107, lr_0 = 2.0157e-04
Loss = 3.4018e-02, PNorm = 69.4729, GNorm = 0.3917, lr_0 = 2.0144e-04
Loss = 4.1524e-02, PNorm = 69.4759, GNorm = 0.5314, lr_0 = 2.0130e-04
Loss = 4.3966e-02, PNorm = 69.4785, GNorm = 0.5647, lr_0 = 2.0116e-04
Loss = 4.3274e-02, PNorm = 69.4806, GNorm = 0.6549, lr_0 = 2.0102e-04
Loss = 4.0100e-02, PNorm = 69.4818, GNorm = 0.7289, lr_0 = 2.0088e-04
Loss = 5.1415e-02, PNorm = 69.4841, GNorm = 0.7492, lr_0 = 2.0075e-04
Loss = 4.2026e-02, PNorm = 69.4865, GNorm = 0.4872, lr_0 = 2.0061e-04
Loss = 4.5668e-02, PNorm = 69.4910, GNorm = 0.6156, lr_0 = 2.0047e-04
Loss = 4.4118e-02, PNorm = 69.4961, GNorm = 0.6191, lr_0 = 2.0033e-04
Loss = 4.4328e-02, PNorm = 69.5000, GNorm = 0.5112, lr_0 = 2.0020e-04
Loss = 4.6116e-02, PNorm = 69.5029, GNorm = 0.3767, lr_0 = 2.0006e-04
Loss = 4.6732e-02, PNorm = 69.5061, GNorm = 0.4664, lr_0 = 1.9992e-04
Loss = 4.4760e-02, PNorm = 69.5090, GNorm = 0.6424, lr_0 = 1.9979e-04
Loss = 4.7225e-02, PNorm = 69.5115, GNorm = 0.8213, lr_0 = 1.9965e-04
Loss = 3.9002e-02, PNorm = 69.5149, GNorm = 0.4623, lr_0 = 1.9951e-04
Loss = 4.0354e-02, PNorm = 69.5189, GNorm = 0.4754, lr_0 = 1.9938e-04
Loss = 4.8573e-02, PNorm = 69.5236, GNorm = 0.4761, lr_0 = 1.9924e-04
Loss = 3.8745e-02, PNorm = 69.5281, GNorm = 0.4587, lr_0 = 1.9910e-04
Loss = 4.8224e-02, PNorm = 69.5317, GNorm = 0.8177, lr_0 = 1.9897e-04
Loss = 4.1329e-02, PNorm = 69.5339, GNorm = 0.5043, lr_0 = 1.9883e-04
Loss = 5.2162e-02, PNorm = 69.5363, GNorm = 0.4288, lr_0 = 1.9869e-04
Loss = 4.0723e-02, PNorm = 69.5399, GNorm = 0.6010, lr_0 = 1.9856e-04
Loss = 4.8589e-02, PNorm = 69.5433, GNorm = 0.4310, lr_0 = 1.9842e-04
Loss = 5.6310e-02, PNorm = 69.5484, GNorm = 0.5334, lr_0 = 1.9829e-04
Loss = 4.4503e-02, PNorm = 69.5518, GNorm = 0.5800, lr_0 = 1.9815e-04
Loss = 4.3707e-02, PNorm = 69.5557, GNorm = 0.4393, lr_0 = 1.9801e-04
Loss = 4.5766e-02, PNorm = 69.5592, GNorm = 0.6901, lr_0 = 1.9788e-04
Loss = 4.9854e-02, PNorm = 69.5625, GNorm = 0.5534, lr_0 = 1.9774e-04
Loss = 5.4289e-02, PNorm = 69.5655, GNorm = 0.6881, lr_0 = 1.9761e-04
Loss = 4.3233e-02, PNorm = 69.5691, GNorm = 0.6775, lr_0 = 1.9747e-04
Loss = 3.9600e-02, PNorm = 69.5721, GNorm = 0.3981, lr_0 = 1.9734e-04
Loss = 4.3888e-02, PNorm = 69.5745, GNorm = 0.8553, lr_0 = 1.9720e-04
Loss = 4.7645e-02, PNorm = 69.5773, GNorm = 0.5521, lr_0 = 1.9707e-04
Loss = 5.0906e-02, PNorm = 69.5808, GNorm = 0.5136, lr_0 = 1.9693e-04
Loss = 5.3910e-02, PNorm = 69.5854, GNorm = 0.7667, lr_0 = 1.9680e-04
Loss = 5.2921e-02, PNorm = 69.5900, GNorm = 0.9446, lr_0 = 1.9666e-04
Loss = 4.2858e-02, PNorm = 69.5935, GNorm = 0.5153, lr_0 = 1.9653e-04
Loss = 5.2226e-02, PNorm = 69.5966, GNorm = 0.4003, lr_0 = 1.9639e-04
Loss = 4.3928e-02, PNorm = 69.6003, GNorm = 0.3903, lr_0 = 1.9626e-04
Loss = 4.0701e-02, PNorm = 69.6024, GNorm = 0.5233, lr_0 = 1.9612e-04
Loss = 4.4272e-02, PNorm = 69.6039, GNorm = 0.4587, lr_0 = 1.9599e-04
Loss = 4.7873e-02, PNorm = 69.6056, GNorm = 0.6191, lr_0 = 1.9585e-04
Loss = 3.8245e-02, PNorm = 69.6083, GNorm = 0.4191, lr_0 = 1.9572e-04
Loss = 4.1735e-02, PNorm = 69.6112, GNorm = 0.4559, lr_0 = 1.9559e-04
Loss = 3.8966e-02, PNorm = 69.6131, GNorm = 0.6058, lr_0 = 1.9545e-04
Loss = 4.0150e-02, PNorm = 69.6152, GNorm = 0.4311, lr_0 = 1.9532e-04
Loss = 5.1006e-02, PNorm = 69.6186, GNorm = 0.4844, lr_0 = 1.9518e-04
Loss = 4.3331e-02, PNorm = 69.6209, GNorm = 0.6596, lr_0 = 1.9505e-04
Loss = 4.7917e-02, PNorm = 69.6227, GNorm = 0.6476, lr_0 = 1.9492e-04
Loss = 4.8445e-02, PNorm = 69.6231, GNorm = 0.6959, lr_0 = 1.9478e-04
Loss = 5.1664e-02, PNorm = 69.6267, GNorm = 0.5694, lr_0 = 1.9465e-04
Loss = 4.4820e-02, PNorm = 69.6333, GNorm = 0.3899, lr_0 = 1.9452e-04
Loss = 4.8997e-02, PNorm = 69.6373, GNorm = 0.6880, lr_0 = 1.9438e-04
Loss = 4.4828e-02, PNorm = 69.6395, GNorm = 0.5667, lr_0 = 1.9425e-04
Loss = 4.7937e-02, PNorm = 69.6406, GNorm = 0.3682, lr_0 = 1.9412e-04
Loss = 5.2364e-02, PNorm = 69.6438, GNorm = 0.5433, lr_0 = 1.9398e-04
Loss = 4.6402e-02, PNorm = 69.6477, GNorm = 0.5809, lr_0 = 1.9385e-04
Loss = 5.4136e-02, PNorm = 69.6514, GNorm = 0.6268, lr_0 = 1.9372e-04
Loss = 4.2423e-02, PNorm = 69.6549, GNorm = 0.6094, lr_0 = 1.9359e-04
Loss = 4.7456e-02, PNorm = 69.6579, GNorm = 0.5655, lr_0 = 1.9345e-04
Loss = 4.3969e-02, PNorm = 69.6614, GNorm = 0.5666, lr_0 = 1.9332e-04
Loss = 4.3916e-02, PNorm = 69.6647, GNorm = 0.6041, lr_0 = 1.9319e-04
Loss = 5.1003e-02, PNorm = 69.6681, GNorm = 0.4782, lr_0 = 1.9306e-04
Validation mae = 0.388129
Epoch 22
Loss = 4.0582e-02, PNorm = 69.6713, GNorm = 0.5301, lr_0 = 1.9292e-04
Loss = 4.0338e-02, PNorm = 69.6742, GNorm = 0.4626, lr_0 = 1.9279e-04
Loss = 3.7448e-02, PNorm = 69.6782, GNorm = 0.5274, lr_0 = 1.9266e-04
Loss = 3.9080e-02, PNorm = 69.6816, GNorm = 0.5499, lr_0 = 1.9253e-04
Loss = 3.6796e-02, PNorm = 69.6851, GNorm = 0.3425, lr_0 = 1.9240e-04
Loss = 3.7451e-02, PNorm = 69.6892, GNorm = 0.3401, lr_0 = 1.9226e-04
Loss = 4.0533e-02, PNorm = 69.6925, GNorm = 0.6365, lr_0 = 1.9213e-04
Loss = 4.1024e-02, PNorm = 69.6954, GNorm = 0.4346, lr_0 = 1.9200e-04
Loss = 3.6501e-02, PNorm = 69.6984, GNorm = 0.4909, lr_0 = 1.9187e-04
Loss = 3.8852e-02, PNorm = 69.7008, GNorm = 0.4480, lr_0 = 1.9174e-04
Loss = 4.3608e-02, PNorm = 69.7036, GNorm = 0.6207, lr_0 = 1.9161e-04
Loss = 4.3436e-02, PNorm = 69.7069, GNorm = 0.4933, lr_0 = 1.9148e-04
Loss = 4.8365e-02, PNorm = 69.7100, GNorm = 0.4436, lr_0 = 1.9134e-04
Loss = 3.7460e-02, PNorm = 69.7118, GNorm = 0.5406, lr_0 = 1.9121e-04
Loss = 3.9668e-02, PNorm = 69.7135, GNorm = 0.4755, lr_0 = 1.9108e-04
Loss = 4.1335e-02, PNorm = 69.7168, GNorm = 0.8670, lr_0 = 1.9095e-04
Loss = 3.7627e-02, PNorm = 69.7214, GNorm = 0.5093, lr_0 = 1.9082e-04
Loss = 3.9425e-02, PNorm = 69.7261, GNorm = 0.5385, lr_0 = 1.9069e-04
Loss = 3.7107e-02, PNorm = 69.7301, GNorm = 0.5005, lr_0 = 1.9056e-04
Loss = 4.8408e-02, PNorm = 69.7338, GNorm = 0.7353, lr_0 = 1.9043e-04
Loss = 4.3680e-02, PNorm = 69.7381, GNorm = 0.4798, lr_0 = 1.9030e-04
Loss = 3.8691e-02, PNorm = 69.7407, GNorm = 0.4222, lr_0 = 1.9017e-04
Loss = 4.0018e-02, PNorm = 69.7440, GNorm = 0.5981, lr_0 = 1.9004e-04
Loss = 4.3372e-02, PNorm = 69.7444, GNorm = 0.6116, lr_0 = 1.8991e-04
Loss = 4.5094e-02, PNorm = 69.7457, GNorm = 0.6287, lr_0 = 1.8978e-04
Loss = 4.3777e-02, PNorm = 69.7476, GNorm = 0.4225, lr_0 = 1.8965e-04
Loss = 4.1714e-02, PNorm = 69.7487, GNorm = 0.5624, lr_0 = 1.8952e-04
Loss = 4.4474e-02, PNorm = 69.7508, GNorm = 0.5769, lr_0 = 1.8939e-04
Loss = 4.4436e-02, PNorm = 69.7548, GNorm = 0.4370, lr_0 = 1.8926e-04
Loss = 4.4794e-02, PNorm = 69.7593, GNorm = 0.7521, lr_0 = 1.8913e-04
Loss = 4.1937e-02, PNorm = 69.7632, GNorm = 0.4078, lr_0 = 1.8900e-04
Loss = 4.0263e-02, PNorm = 69.7685, GNorm = 0.4950, lr_0 = 1.8887e-04
Loss = 4.1047e-02, PNorm = 69.7717, GNorm = 0.3845, lr_0 = 1.8874e-04
Loss = 4.3993e-02, PNorm = 69.7727, GNorm = 0.5305, lr_0 = 1.8861e-04
Loss = 4.0014e-02, PNorm = 69.7750, GNorm = 0.5228, lr_0 = 1.8848e-04
Loss = 4.2748e-02, PNorm = 69.7781, GNorm = 0.6223, lr_0 = 1.8835e-04
Loss = 4.5096e-02, PNorm = 69.7805, GNorm = 0.6652, lr_0 = 1.8822e-04
Loss = 4.2910e-02, PNorm = 69.7820, GNorm = 0.6347, lr_0 = 1.8809e-04
Loss = 3.8376e-02, PNorm = 69.7839, GNorm = 0.4774, lr_0 = 1.8797e-04
Loss = 4.7129e-02, PNorm = 69.7887, GNorm = 0.5066, lr_0 = 1.8784e-04
Loss = 3.8724e-02, PNorm = 69.7941, GNorm = 0.4953, lr_0 = 1.8771e-04
Loss = 4.2369e-02, PNorm = 69.7978, GNorm = 0.6018, lr_0 = 1.8758e-04
Loss = 4.4789e-02, PNorm = 69.8007, GNorm = 0.4728, lr_0 = 1.8745e-04
Loss = 3.6839e-02, PNorm = 69.8033, GNorm = 0.4045, lr_0 = 1.8732e-04
Loss = 4.0071e-02, PNorm = 69.8070, GNorm = 0.4992, lr_0 = 1.8719e-04
Loss = 4.2652e-02, PNorm = 69.8104, GNorm = 0.4661, lr_0 = 1.8707e-04
Loss = 4.1588e-02, PNorm = 69.8132, GNorm = 0.6074, lr_0 = 1.8694e-04
Loss = 4.0008e-02, PNorm = 69.8167, GNorm = 0.4962, lr_0 = 1.8681e-04
Loss = 4.6760e-02, PNorm = 69.8193, GNorm = 0.5975, lr_0 = 1.8668e-04
Loss = 4.3426e-02, PNorm = 69.8231, GNorm = 0.5427, lr_0 = 1.8655e-04
Loss = 4.2979e-02, PNorm = 69.8251, GNorm = 0.4826, lr_0 = 1.8643e-04
Loss = 4.8216e-02, PNorm = 69.8288, GNorm = 0.4195, lr_0 = 1.8630e-04
Loss = 3.7805e-02, PNorm = 69.8325, GNorm = 0.4511, lr_0 = 1.8617e-04
Loss = 3.8742e-02, PNorm = 69.8353, GNorm = 0.3932, lr_0 = 1.8604e-04
Loss = 4.4028e-02, PNorm = 69.8379, GNorm = 0.4172, lr_0 = 1.8592e-04
Loss = 3.7049e-02, PNorm = 69.8399, GNorm = 0.5541, lr_0 = 1.8579e-04
Loss = 3.9862e-02, PNorm = 69.8429, GNorm = 0.5169, lr_0 = 1.8566e-04
Loss = 4.0992e-02, PNorm = 69.8461, GNorm = 0.6113, lr_0 = 1.8553e-04
Loss = 4.1359e-02, PNorm = 69.8501, GNorm = 0.4321, lr_0 = 1.8541e-04
Loss = 4.5484e-02, PNorm = 69.8520, GNorm = 0.4112, lr_0 = 1.8528e-04
Loss = 4.0316e-02, PNorm = 69.8540, GNorm = 0.5402, lr_0 = 1.8515e-04
Loss = 4.3219e-02, PNorm = 69.8566, GNorm = 0.6000, lr_0 = 1.8503e-04
Loss = 4.5564e-02, PNorm = 69.8590, GNorm = 0.5356, lr_0 = 1.8490e-04
Loss = 4.2837e-02, PNorm = 69.8631, GNorm = 0.4485, lr_0 = 1.8477e-04
Loss = 4.4654e-02, PNorm = 69.8652, GNorm = 0.8234, lr_0 = 1.8465e-04
Loss = 4.1816e-02, PNorm = 69.8682, GNorm = 0.5057, lr_0 = 1.8452e-04
Loss = 4.2393e-02, PNorm = 69.8716, GNorm = 0.5915, lr_0 = 1.8439e-04
Loss = 4.6095e-02, PNorm = 69.8744, GNorm = 0.7853, lr_0 = 1.8427e-04
Loss = 4.3849e-02, PNorm = 69.8774, GNorm = 0.5752, lr_0 = 1.8414e-04
Loss = 4.7519e-02, PNorm = 69.8806, GNorm = 0.5690, lr_0 = 1.8401e-04
Loss = 4.1042e-02, PNorm = 69.8841, GNorm = 0.5305, lr_0 = 1.8389e-04
Loss = 4.3424e-02, PNorm = 69.8860, GNorm = 0.4989, lr_0 = 1.8376e-04
Loss = 4.0091e-02, PNorm = 69.8899, GNorm = 0.4182, lr_0 = 1.8364e-04
Loss = 4.1983e-02, PNorm = 69.8928, GNorm = 0.6174, lr_0 = 1.8351e-04
Loss = 4.2364e-02, PNorm = 69.8958, GNorm = 0.4881, lr_0 = 1.8338e-04
Loss = 4.6140e-02, PNorm = 69.8996, GNorm = 0.3796, lr_0 = 1.8326e-04
Loss = 4.1449e-02, PNorm = 69.9013, GNorm = 0.4678, lr_0 = 1.8313e-04
Loss = 4.2767e-02, PNorm = 69.9038, GNorm = 0.6042, lr_0 = 1.8301e-04
Loss = 4.0531e-02, PNorm = 69.9060, GNorm = 0.4261, lr_0 = 1.8288e-04
Loss = 4.1929e-02, PNorm = 69.9084, GNorm = 0.4213, lr_0 = 1.8276e-04
Loss = 4.0886e-02, PNorm = 69.9116, GNorm = 0.4876, lr_0 = 1.8263e-04
Loss = 3.9282e-02, PNorm = 69.9142, GNorm = 0.5205, lr_0 = 1.8251e-04
Loss = 4.4800e-02, PNorm = 69.9161, GNorm = 0.6489, lr_0 = 1.8238e-04
Loss = 4.4448e-02, PNorm = 69.9193, GNorm = 0.4426, lr_0 = 1.8226e-04
Loss = 4.8379e-02, PNorm = 69.9243, GNorm = 0.6183, lr_0 = 1.8213e-04
Loss = 4.5504e-02, PNorm = 69.9283, GNorm = 0.4506, lr_0 = 1.8201e-04
Loss = 4.9438e-02, PNorm = 69.9299, GNorm = 0.5634, lr_0 = 1.8188e-04
Loss = 4.4998e-02, PNorm = 69.9333, GNorm = 0.3932, lr_0 = 1.8176e-04
Loss = 3.9347e-02, PNorm = 69.9365, GNorm = 0.6130, lr_0 = 1.8163e-04
Loss = 3.7818e-02, PNorm = 69.9394, GNorm = 0.4054, lr_0 = 1.8151e-04
Loss = 3.8098e-02, PNorm = 69.9422, GNorm = 0.6200, lr_0 = 1.8138e-04
Loss = 4.3336e-02, PNorm = 69.9449, GNorm = 0.4663, lr_0 = 1.8126e-04
Loss = 4.5107e-02, PNorm = 69.9466, GNorm = 0.6422, lr_0 = 1.8114e-04
Loss = 4.5195e-02, PNorm = 69.9492, GNorm = 0.7294, lr_0 = 1.8101e-04
Loss = 4.9790e-02, PNorm = 69.9510, GNorm = 0.6028, lr_0 = 1.8089e-04
Loss = 4.2978e-02, PNorm = 69.9537, GNorm = 0.5236, lr_0 = 1.8076e-04
Loss = 4.0595e-02, PNorm = 69.9568, GNorm = 0.4526, lr_0 = 1.8064e-04
Loss = 4.0329e-02, PNorm = 69.9596, GNorm = 0.4358, lr_0 = 1.8052e-04
Loss = 3.9709e-02, PNorm = 69.9620, GNorm = 0.5545, lr_0 = 1.8039e-04
Loss = 4.0181e-02, PNorm = 69.9644, GNorm = 0.5034, lr_0 = 1.8027e-04
Loss = 4.7960e-02, PNorm = 69.9667, GNorm = 0.5623, lr_0 = 1.8015e-04
Loss = 4.7810e-02, PNorm = 69.9698, GNorm = 0.6389, lr_0 = 1.8002e-04
Loss = 4.5902e-02, PNorm = 69.9730, GNorm = 0.4588, lr_0 = 1.7990e-04
Loss = 4.3118e-02, PNorm = 69.9772, GNorm = 0.9083, lr_0 = 1.7978e-04
Loss = 4.6960e-02, PNorm = 69.9797, GNorm = 0.5920, lr_0 = 1.7965e-04
Loss = 3.8606e-02, PNorm = 69.9815, GNorm = 0.7402, lr_0 = 1.7953e-04
Loss = 4.1630e-02, PNorm = 69.9834, GNorm = 0.4481, lr_0 = 1.7941e-04
Loss = 3.7871e-02, PNorm = 69.9871, GNorm = 0.4440, lr_0 = 1.7928e-04
Loss = 3.8326e-02, PNorm = 69.9890, GNorm = 0.5845, lr_0 = 1.7916e-04
Loss = 4.1345e-02, PNorm = 69.9919, GNorm = 0.5049, lr_0 = 1.7904e-04
Loss = 4.7515e-02, PNorm = 69.9950, GNorm = 0.7045, lr_0 = 1.7892e-04
Loss = 4.5406e-02, PNorm = 69.9981, GNorm = 0.4085, lr_0 = 1.7879e-04
Loss = 3.9933e-02, PNorm = 69.9998, GNorm = 0.4516, lr_0 = 1.7867e-04
Loss = 4.8045e-02, PNorm = 70.0024, GNorm = 0.6662, lr_0 = 1.7855e-04
Loss = 4.0168e-02, PNorm = 70.0045, GNorm = 0.5223, lr_0 = 1.7843e-04
Loss = 3.6715e-02, PNorm = 70.0055, GNorm = 0.4540, lr_0 = 1.7830e-04
Loss = 4.4492e-02, PNorm = 70.0075, GNorm = 0.5454, lr_0 = 1.7818e-04
Loss = 3.7208e-02, PNorm = 70.0103, GNorm = 0.7091, lr_0 = 1.7806e-04
Loss = 3.8611e-02, PNorm = 70.0127, GNorm = 0.5021, lr_0 = 1.7794e-04
Loss = 4.1940e-02, PNorm = 70.0144, GNorm = 0.4109, lr_0 = 1.7782e-04
Validation mae = 0.390633
Epoch 23
Loss = 3.5436e-02, PNorm = 70.0160, GNorm = 0.5009, lr_0 = 1.7769e-04
Loss = 3.4033e-02, PNorm = 70.0198, GNorm = 0.8572, lr_0 = 1.7757e-04
Loss = 3.8752e-02, PNorm = 70.0245, GNorm = 0.5790, lr_0 = 1.7745e-04
Loss = 3.7793e-02, PNorm = 70.0279, GNorm = 0.3521, lr_0 = 1.7733e-04
Loss = 3.9824e-02, PNorm = 70.0308, GNorm = 0.4688, lr_0 = 1.7721e-04
Loss = 3.8758e-02, PNorm = 70.0331, GNorm = 0.3721, lr_0 = 1.7709e-04
Loss = 3.1810e-02, PNorm = 70.0349, GNorm = 0.5771, lr_0 = 1.7696e-04
Loss = 3.6304e-02, PNorm = 70.0375, GNorm = 0.5912, lr_0 = 1.7684e-04
Loss = 3.6427e-02, PNorm = 70.0404, GNorm = 0.4312, lr_0 = 1.7672e-04
Loss = 3.8767e-02, PNorm = 70.0435, GNorm = 0.5517, lr_0 = 1.7660e-04
Loss = 4.0731e-02, PNorm = 70.0464, GNorm = 0.8621, lr_0 = 1.7648e-04
Loss = 3.7290e-02, PNorm = 70.0495, GNorm = 0.5868, lr_0 = 1.7636e-04
Loss = 4.0235e-02, PNorm = 70.0519, GNorm = 0.8665, lr_0 = 1.7624e-04
Loss = 3.7676e-02, PNorm = 70.0540, GNorm = 0.3813, lr_0 = 1.7612e-04
Loss = 4.1743e-02, PNorm = 70.0581, GNorm = 0.4859, lr_0 = 1.7600e-04
Loss = 3.3363e-02, PNorm = 70.0609, GNorm = 0.3774, lr_0 = 1.7588e-04
Loss = 3.6391e-02, PNorm = 70.0641, GNorm = 0.3761, lr_0 = 1.7576e-04
Loss = 3.0863e-02, PNorm = 70.0657, GNorm = 0.4015, lr_0 = 1.7564e-04
Loss = 3.7567e-02, PNorm = 70.0678, GNorm = 0.4969, lr_0 = 1.7552e-04
Loss = 3.4635e-02, PNorm = 70.0704, GNorm = 0.3651, lr_0 = 1.7540e-04
Loss = 3.7494e-02, PNorm = 70.0731, GNorm = 0.4991, lr_0 = 1.7528e-04
Loss = 3.4921e-02, PNorm = 70.0748, GNorm = 0.4765, lr_0 = 1.7516e-04
Loss = 3.7046e-02, PNorm = 70.0770, GNorm = 0.3538, lr_0 = 1.7504e-04
Loss = 4.0992e-02, PNorm = 70.0800, GNorm = 0.4682, lr_0 = 1.7492e-04
Loss = 4.1606e-02, PNorm = 70.0835, GNorm = 0.4861, lr_0 = 1.7480e-04
Loss = 4.4790e-02, PNorm = 70.0875, GNorm = 0.5627, lr_0 = 1.7468e-04
Loss = 3.3229e-02, PNorm = 70.0899, GNorm = 0.5689, lr_0 = 1.7456e-04
Loss = 3.3448e-02, PNorm = 70.0924, GNorm = 0.5999, lr_0 = 1.7444e-04
Loss = 4.1836e-02, PNorm = 70.0964, GNorm = 0.8090, lr_0 = 1.7432e-04
Loss = 4.7758e-02, PNorm = 70.1003, GNorm = 0.5096, lr_0 = 1.7420e-04
Loss = 3.5677e-02, PNorm = 70.1031, GNorm = 0.6116, lr_0 = 1.7408e-04
Loss = 4.7825e-02, PNorm = 70.1058, GNorm = 0.7247, lr_0 = 1.7396e-04
Loss = 3.5234e-02, PNorm = 70.1075, GNorm = 0.5101, lr_0 = 1.7384e-04
Loss = 3.7395e-02, PNorm = 70.1098, GNorm = 0.4699, lr_0 = 1.7372e-04
Loss = 4.0075e-02, PNorm = 70.1125, GNorm = 0.5115, lr_0 = 1.7360e-04
Loss = 3.3707e-02, PNorm = 70.1149, GNorm = 0.5343, lr_0 = 1.7348e-04
Loss = 3.7504e-02, PNorm = 70.1168, GNorm = 0.4419, lr_0 = 1.7336e-04
Loss = 4.1170e-02, PNorm = 70.1221, GNorm = 0.7687, lr_0 = 1.7325e-04
Loss = 3.3876e-02, PNorm = 70.1272, GNorm = 0.4616, lr_0 = 1.7313e-04
Loss = 3.5096e-02, PNorm = 70.1293, GNorm = 0.4741, lr_0 = 1.7301e-04
Loss = 3.3069e-02, PNorm = 70.1317, GNorm = 0.4307, lr_0 = 1.7289e-04
Loss = 3.6685e-02, PNorm = 70.1337, GNorm = 0.3613, lr_0 = 1.7277e-04
Loss = 3.7464e-02, PNorm = 70.1354, GNorm = 0.4639, lr_0 = 1.7265e-04
Loss = 3.6775e-02, PNorm = 70.1365, GNorm = 0.6104, lr_0 = 1.7253e-04
Loss = 4.1990e-02, PNorm = 70.1382, GNorm = 0.5357, lr_0 = 1.7242e-04
Loss = 3.8515e-02, PNorm = 70.1410, GNorm = 0.4804, lr_0 = 1.7230e-04
Loss = 3.7709e-02, PNorm = 70.1440, GNorm = 0.5609, lr_0 = 1.7218e-04
Loss = 4.1869e-02, PNorm = 70.1461, GNorm = 0.3306, lr_0 = 1.7206e-04
Loss = 3.5493e-02, PNorm = 70.1498, GNorm = 0.6351, lr_0 = 1.7194e-04
Loss = 4.0865e-02, PNorm = 70.1533, GNorm = 0.6786, lr_0 = 1.7183e-04
Loss = 3.7820e-02, PNorm = 70.1545, GNorm = 0.5471, lr_0 = 1.7171e-04
Loss = 3.8643e-02, PNorm = 70.1569, GNorm = 0.5026, lr_0 = 1.7159e-04
Loss = 3.7406e-02, PNorm = 70.1601, GNorm = 0.3994, lr_0 = 1.7147e-04
Loss = 4.3611e-02, PNorm = 70.1623, GNorm = 0.5075, lr_0 = 1.7136e-04
Loss = 4.1689e-02, PNorm = 70.1648, GNorm = 0.4626, lr_0 = 1.7124e-04
Loss = 4.5256e-02, PNorm = 70.1668, GNorm = 0.5573, lr_0 = 1.7112e-04
Loss = 4.0153e-02, PNorm = 70.1692, GNorm = 0.4777, lr_0 = 1.7100e-04
Loss = 4.8291e-02, PNorm = 70.1722, GNorm = 0.5324, lr_0 = 1.7089e-04
Loss = 4.0058e-02, PNorm = 70.1756, GNorm = 0.5296, lr_0 = 1.7077e-04
Loss = 3.7891e-02, PNorm = 70.1777, GNorm = 0.4232, lr_0 = 1.7065e-04
Loss = 4.3327e-02, PNorm = 70.1814, GNorm = 0.5282, lr_0 = 1.7054e-04
Loss = 4.1259e-02, PNorm = 70.1844, GNorm = 0.6136, lr_0 = 1.7042e-04
Loss = 3.6946e-02, PNorm = 70.1871, GNorm = 0.3854, lr_0 = 1.7030e-04
Loss = 4.1786e-02, PNorm = 70.1904, GNorm = 0.5087, lr_0 = 1.7019e-04
Loss = 3.8986e-02, PNorm = 70.1925, GNorm = 0.5152, lr_0 = 1.7007e-04
Loss = 4.2744e-02, PNorm = 70.1949, GNorm = 0.5912, lr_0 = 1.6995e-04
Loss = 4.7223e-02, PNorm = 70.1984, GNorm = 0.4777, lr_0 = 1.6984e-04
Loss = 4.8563e-02, PNorm = 70.2017, GNorm = 0.4254, lr_0 = 1.6972e-04
Loss = 3.9229e-02, PNorm = 70.2050, GNorm = 0.5014, lr_0 = 1.6960e-04
Loss = 4.6703e-02, PNorm = 70.2086, GNorm = 0.6448, lr_0 = 1.6949e-04
Loss = 3.9327e-02, PNorm = 70.2119, GNorm = 0.4721, lr_0 = 1.6937e-04
Loss = 4.1029e-02, PNorm = 70.2137, GNorm = 0.4843, lr_0 = 1.6926e-04
Loss = 3.5799e-02, PNorm = 70.2157, GNorm = 0.4664, lr_0 = 1.6914e-04
Loss = 3.6391e-02, PNorm = 70.2182, GNorm = 0.5211, lr_0 = 1.6902e-04
Loss = 4.2041e-02, PNorm = 70.2201, GNorm = 0.6048, lr_0 = 1.6891e-04
Loss = 3.9437e-02, PNorm = 70.2219, GNorm = 0.3369, lr_0 = 1.6879e-04
Loss = 3.7509e-02, PNorm = 70.2239, GNorm = 0.7423, lr_0 = 1.6868e-04
Loss = 4.2503e-02, PNorm = 70.2262, GNorm = 0.8327, lr_0 = 1.6856e-04
Loss = 4.3525e-02, PNorm = 70.2276, GNorm = 0.4866, lr_0 = 1.6845e-04
Loss = 4.5596e-02, PNorm = 70.2298, GNorm = 0.8881, lr_0 = 1.6833e-04
Loss = 3.1372e-02, PNorm = 70.2326, GNorm = 0.4799, lr_0 = 1.6821e-04
Loss = 4.2704e-02, PNorm = 70.2346, GNorm = 0.5542, lr_0 = 1.6810e-04
Loss = 4.1978e-02, PNorm = 70.2355, GNorm = 0.5430, lr_0 = 1.6798e-04
Loss = 3.7478e-02, PNorm = 70.2377, GNorm = 0.5157, lr_0 = 1.6787e-04
Loss = 4.4061e-02, PNorm = 70.2404, GNorm = 0.4943, lr_0 = 1.6775e-04
Loss = 4.2059e-02, PNorm = 70.2435, GNorm = 0.5827, lr_0 = 1.6764e-04
Loss = 4.4711e-02, PNorm = 70.2464, GNorm = 0.4652, lr_0 = 1.6752e-04
Loss = 4.3186e-02, PNorm = 70.2493, GNorm = 0.7634, lr_0 = 1.6741e-04
Loss = 3.1792e-02, PNorm = 70.2527, GNorm = 0.6055, lr_0 = 1.6729e-04
Loss = 4.7837e-02, PNorm = 70.2556, GNorm = 0.5121, lr_0 = 1.6718e-04
Loss = 4.0400e-02, PNorm = 70.2584, GNorm = 0.3763, lr_0 = 1.6707e-04
Loss = 4.3129e-02, PNorm = 70.2622, GNorm = 0.5395, lr_0 = 1.6695e-04
Loss = 4.4133e-02, PNorm = 70.2653, GNorm = 0.5720, lr_0 = 1.6684e-04
Loss = 4.4053e-02, PNorm = 70.2671, GNorm = 0.4050, lr_0 = 1.6672e-04
Loss = 4.3377e-02, PNorm = 70.2689, GNorm = 0.7744, lr_0 = 1.6661e-04
Loss = 4.3419e-02, PNorm = 70.2707, GNorm = 0.7670, lr_0 = 1.6649e-04
Loss = 3.8771e-02, PNorm = 70.2731, GNorm = 0.5423, lr_0 = 1.6638e-04
Loss = 4.1293e-02, PNorm = 70.2767, GNorm = 0.5092, lr_0 = 1.6627e-04
Loss = 4.0527e-02, PNorm = 70.2784, GNorm = 0.6308, lr_0 = 1.6615e-04
Loss = 4.5349e-02, PNorm = 70.2794, GNorm = 0.5834, lr_0 = 1.6604e-04
Loss = 4.3877e-02, PNorm = 70.2816, GNorm = 0.7423, lr_0 = 1.6592e-04
Loss = 3.9043e-02, PNorm = 70.2853, GNorm = 0.4751, lr_0 = 1.6581e-04
Loss = 4.4651e-02, PNorm = 70.2895, GNorm = 0.5695, lr_0 = 1.6570e-04
Loss = 3.8487e-02, PNorm = 70.2918, GNorm = 0.4472, lr_0 = 1.6558e-04
Loss = 4.5262e-02, PNorm = 70.2954, GNorm = 0.7428, lr_0 = 1.6547e-04
Loss = 4.2296e-02, PNorm = 70.2989, GNorm = 0.4181, lr_0 = 1.6536e-04
Loss = 3.5275e-02, PNorm = 70.3020, GNorm = 0.4094, lr_0 = 1.6524e-04
Loss = 3.7153e-02, PNorm = 70.3041, GNorm = 0.4735, lr_0 = 1.6513e-04
Loss = 3.7927e-02, PNorm = 70.3072, GNorm = 0.4785, lr_0 = 1.6502e-04
Loss = 3.2118e-02, PNorm = 70.3092, GNorm = 0.4259, lr_0 = 1.6490e-04
Loss = 4.0374e-02, PNorm = 70.3108, GNorm = 0.4985, lr_0 = 1.6479e-04
Loss = 4.1246e-02, PNorm = 70.3117, GNorm = 0.5379, lr_0 = 1.6468e-04
Loss = 3.8738e-02, PNorm = 70.3146, GNorm = 0.5249, lr_0 = 1.6457e-04
Loss = 4.1103e-02, PNorm = 70.3177, GNorm = 0.3589, lr_0 = 1.6445e-04
Loss = 3.9717e-02, PNorm = 70.3213, GNorm = 0.4491, lr_0 = 1.6434e-04
Loss = 4.4824e-02, PNorm = 70.3236, GNorm = 0.4269, lr_0 = 1.6423e-04
Loss = 3.8730e-02, PNorm = 70.3238, GNorm = 0.4355, lr_0 = 1.6412e-04
Loss = 3.9382e-02, PNorm = 70.3249, GNorm = 0.4512, lr_0 = 1.6400e-04
Loss = 4.2319e-02, PNorm = 70.3269, GNorm = 0.6688, lr_0 = 1.6389e-04
Loss = 3.9387e-02, PNorm = 70.3291, GNorm = 0.3624, lr_0 = 1.6378e-04
Validation mae = 0.389647
Epoch 24
Loss = 3.1952e-02, PNorm = 70.3313, GNorm = 0.4610, lr_0 = 1.6367e-04
Loss = 3.8543e-02, PNorm = 70.3333, GNorm = 0.5924, lr_0 = 1.6355e-04
Loss = 3.1975e-02, PNorm = 70.3357, GNorm = 0.4246, lr_0 = 1.6344e-04
Loss = 3.3468e-02, PNorm = 70.3392, GNorm = 0.3878, lr_0 = 1.6333e-04
Loss = 3.3807e-02, PNorm = 70.3424, GNorm = 0.5035, lr_0 = 1.6322e-04
Loss = 3.5135e-02, PNorm = 70.3432, GNorm = 0.4650, lr_0 = 1.6311e-04
Loss = 3.7624e-02, PNorm = 70.3454, GNorm = 0.4781, lr_0 = 1.6299e-04
Loss = 2.7377e-02, PNorm = 70.3479, GNorm = 0.4432, lr_0 = 1.6288e-04
Loss = 4.0486e-02, PNorm = 70.3514, GNorm = 0.4181, lr_0 = 1.6277e-04
Loss = 3.7487e-02, PNorm = 70.3548, GNorm = 0.4615, lr_0 = 1.6266e-04
Loss = 3.6910e-02, PNorm = 70.3580, GNorm = 0.5528, lr_0 = 1.6255e-04
Loss = 3.2141e-02, PNorm = 70.3607, GNorm = 0.4368, lr_0 = 1.6244e-04
Loss = 3.5903e-02, PNorm = 70.3623, GNorm = 0.4633, lr_0 = 1.6233e-04
Loss = 3.9335e-02, PNorm = 70.3638, GNorm = 0.5249, lr_0 = 1.6221e-04
Loss = 3.2460e-02, PNorm = 70.3666, GNorm = 0.3651, lr_0 = 1.6210e-04
Loss = 4.2920e-02, PNorm = 70.3686, GNorm = 0.4276, lr_0 = 1.6199e-04
Loss = 3.4451e-02, PNorm = 70.3710, GNorm = 0.5029, lr_0 = 1.6188e-04
Loss = 3.4502e-02, PNorm = 70.3737, GNorm = 0.4353, lr_0 = 1.6177e-04
Loss = 3.5805e-02, PNorm = 70.3756, GNorm = 0.5325, lr_0 = 1.6166e-04
Loss = 3.1248e-02, PNorm = 70.3786, GNorm = 0.3144, lr_0 = 1.6155e-04
Loss = 3.3327e-02, PNorm = 70.3811, GNorm = 0.5047, lr_0 = 1.6144e-04
Loss = 3.8824e-02, PNorm = 70.3836, GNorm = 0.5760, lr_0 = 1.6133e-04
Loss = 3.1167e-02, PNorm = 70.3864, GNorm = 0.5627, lr_0 = 1.6122e-04
Loss = 3.8270e-02, PNorm = 70.3886, GNorm = 0.5363, lr_0 = 1.6111e-04
Loss = 3.4150e-02, PNorm = 70.3907, GNorm = 0.3441, lr_0 = 1.6100e-04
Loss = 3.4496e-02, PNorm = 70.3941, GNorm = 0.4377, lr_0 = 1.6089e-04
Loss = 3.5238e-02, PNorm = 70.3969, GNorm = 0.4284, lr_0 = 1.6078e-04
Loss = 3.4342e-02, PNorm = 70.3999, GNorm = 0.4666, lr_0 = 1.6067e-04
Loss = 3.2839e-02, PNorm = 70.4024, GNorm = 0.5848, lr_0 = 1.6056e-04
Loss = 3.5607e-02, PNorm = 70.4043, GNorm = 0.5028, lr_0 = 1.6045e-04
Loss = 3.8701e-02, PNorm = 70.4061, GNorm = 0.6643, lr_0 = 1.6034e-04
Loss = 3.6772e-02, PNorm = 70.4072, GNorm = 0.4946, lr_0 = 1.6023e-04
Loss = 4.0217e-02, PNorm = 70.4090, GNorm = 0.5552, lr_0 = 1.6012e-04
Loss = 3.5838e-02, PNorm = 70.4113, GNorm = 0.4889, lr_0 = 1.6001e-04
Loss = 4.0647e-02, PNorm = 70.4157, GNorm = 0.4647, lr_0 = 1.5990e-04
Loss = 3.7722e-02, PNorm = 70.4177, GNorm = 0.4708, lr_0 = 1.5979e-04
Loss = 3.4799e-02, PNorm = 70.4195, GNorm = 0.5985, lr_0 = 1.5968e-04
Loss = 3.6726e-02, PNorm = 70.4212, GNorm = 0.4881, lr_0 = 1.5957e-04
Loss = 3.3368e-02, PNorm = 70.4228, GNorm = 0.4090, lr_0 = 1.5946e-04
Loss = 3.3968e-02, PNorm = 70.4244, GNorm = 0.3792, lr_0 = 1.5935e-04
Loss = 3.4513e-02, PNorm = 70.4256, GNorm = 0.4478, lr_0 = 1.5924e-04
Loss = 2.9740e-02, PNorm = 70.4276, GNorm = 0.4824, lr_0 = 1.5913e-04
Loss = 3.7053e-02, PNorm = 70.4304, GNorm = 0.4678, lr_0 = 1.5902e-04
Loss = 3.1892e-02, PNorm = 70.4320, GNorm = 0.4972, lr_0 = 1.5891e-04
Loss = 4.8427e-02, PNorm = 70.4343, GNorm = 0.6049, lr_0 = 1.5880e-04
Loss = 4.0203e-02, PNorm = 70.4377, GNorm = 0.6333, lr_0 = 1.5870e-04
Loss = 4.2877e-02, PNorm = 70.4397, GNorm = 0.6373, lr_0 = 1.5859e-04
Loss = 3.7035e-02, PNorm = 70.4420, GNorm = 0.4476, lr_0 = 1.5848e-04
Loss = 3.6745e-02, PNorm = 70.4442, GNorm = 0.6043, lr_0 = 1.5837e-04
Loss = 4.9328e-02, PNorm = 70.4475, GNorm = 0.4715, lr_0 = 1.5826e-04
Loss = 3.2481e-02, PNorm = 70.4520, GNorm = 0.3345, lr_0 = 1.5815e-04
Loss = 3.2087e-02, PNorm = 70.4551, GNorm = 0.5737, lr_0 = 1.5804e-04
Loss = 3.6486e-02, PNorm = 70.4569, GNorm = 0.4434, lr_0 = 1.5794e-04
Loss = 3.4402e-02, PNorm = 70.4589, GNorm = 0.5271, lr_0 = 1.5783e-04
Loss = 4.1035e-02, PNorm = 70.4616, GNorm = 0.4250, lr_0 = 1.5772e-04
Loss = 3.9688e-02, PNorm = 70.4633, GNorm = 0.4411, lr_0 = 1.5761e-04
Loss = 4.1574e-02, PNorm = 70.4661, GNorm = 0.4912, lr_0 = 1.5750e-04
Loss = 3.7031e-02, PNorm = 70.4692, GNorm = 0.5937, lr_0 = 1.5740e-04
Loss = 4.2241e-02, PNorm = 70.4715, GNorm = 0.5313, lr_0 = 1.5729e-04
Loss = 3.7202e-02, PNorm = 70.4742, GNorm = 0.5459, lr_0 = 1.5718e-04
Loss = 3.9888e-02, PNorm = 70.4781, GNorm = 0.4479, lr_0 = 1.5707e-04
Loss = 3.5337e-02, PNorm = 70.4807, GNorm = 0.7250, lr_0 = 1.5697e-04
Loss = 3.3857e-02, PNorm = 70.4829, GNorm = 0.7733, lr_0 = 1.5686e-04
Loss = 3.4296e-02, PNorm = 70.4849, GNorm = 0.4453, lr_0 = 1.5675e-04
Loss = 4.1447e-02, PNorm = 70.4872, GNorm = 0.3864, lr_0 = 1.5664e-04
Loss = 4.0161e-02, PNorm = 70.4897, GNorm = 0.5293, lr_0 = 1.5654e-04
Loss = 4.4540e-02, PNorm = 70.4912, GNorm = 0.4276, lr_0 = 1.5643e-04
Loss = 3.9488e-02, PNorm = 70.4936, GNorm = 0.4411, lr_0 = 1.5632e-04
Loss = 3.8338e-02, PNorm = 70.4964, GNorm = 0.5308, lr_0 = 1.5621e-04
Loss = 3.8741e-02, PNorm = 70.4988, GNorm = 0.4573, lr_0 = 1.5611e-04
Loss = 3.7815e-02, PNorm = 70.5011, GNorm = 0.3895, lr_0 = 1.5600e-04
Loss = 4.1771e-02, PNorm = 70.5030, GNorm = 0.5273, lr_0 = 1.5589e-04
Loss = 4.3912e-02, PNorm = 70.5047, GNorm = 0.4675, lr_0 = 1.5579e-04
Loss = 3.8106e-02, PNorm = 70.5064, GNorm = 0.6680, lr_0 = 1.5568e-04
Loss = 3.6655e-02, PNorm = 70.5081, GNorm = 0.4134, lr_0 = 1.5557e-04
Loss = 3.7874e-02, PNorm = 70.5097, GNorm = 0.4724, lr_0 = 1.5547e-04
Loss = 3.9455e-02, PNorm = 70.5112, GNorm = 0.5239, lr_0 = 1.5536e-04
Loss = 4.0683e-02, PNorm = 70.5126, GNorm = 0.3999, lr_0 = 1.5525e-04
Loss = 3.2684e-02, PNorm = 70.5141, GNorm = 0.5904, lr_0 = 1.5515e-04
Loss = 4.8190e-02, PNorm = 70.5168, GNorm = 0.6543, lr_0 = 1.5504e-04
Loss = 3.8206e-02, PNorm = 70.5183, GNorm = 0.4975, lr_0 = 1.5493e-04
Loss = 3.5052e-02, PNorm = 70.5195, GNorm = 0.4326, lr_0 = 1.5483e-04
Loss = 4.0748e-02, PNorm = 70.5217, GNorm = 0.5109, lr_0 = 1.5472e-04
Loss = 3.7047e-02, PNorm = 70.5244, GNorm = 0.5207, lr_0 = 1.5462e-04
Loss = 3.5696e-02, PNorm = 70.5270, GNorm = 0.4391, lr_0 = 1.5451e-04
Loss = 4.0911e-02, PNorm = 70.5311, GNorm = 0.4567, lr_0 = 1.5440e-04
Loss = 3.1734e-02, PNorm = 70.5343, GNorm = 0.3491, lr_0 = 1.5430e-04
Loss = 3.8393e-02, PNorm = 70.5359, GNorm = 0.6866, lr_0 = 1.5419e-04
Loss = 4.0034e-02, PNorm = 70.5362, GNorm = 0.6045, lr_0 = 1.5409e-04
Loss = 3.5537e-02, PNorm = 70.5389, GNorm = 0.4960, lr_0 = 1.5398e-04
Loss = 3.2714e-02, PNorm = 70.5406, GNorm = 0.4911, lr_0 = 1.5388e-04
Loss = 3.6057e-02, PNorm = 70.5428, GNorm = 0.4913, lr_0 = 1.5377e-04
Loss = 3.8573e-02, PNorm = 70.5456, GNorm = 0.6684, lr_0 = 1.5367e-04
Loss = 3.9307e-02, PNorm = 70.5473, GNorm = 0.5194, lr_0 = 1.5356e-04
Loss = 3.9634e-02, PNorm = 70.5494, GNorm = 0.6499, lr_0 = 1.5346e-04
Loss = 4.1343e-02, PNorm = 70.5521, GNorm = 0.5952, lr_0 = 1.5335e-04
Loss = 4.3600e-02, PNorm = 70.5539, GNorm = 0.6457, lr_0 = 1.5325e-04
Loss = 3.5238e-02, PNorm = 70.5562, GNorm = 0.3126, lr_0 = 1.5314e-04
Loss = 4.0126e-02, PNorm = 70.5590, GNorm = 0.5947, lr_0 = 1.5304e-04
Loss = 3.4518e-02, PNorm = 70.5612, GNorm = 0.4790, lr_0 = 1.5293e-04
Loss = 4.2425e-02, PNorm = 70.5628, GNorm = 0.4061, lr_0 = 1.5283e-04
Loss = 3.9595e-02, PNorm = 70.5650, GNorm = 0.5030, lr_0 = 1.5272e-04
Loss = 4.0744e-02, PNorm = 70.5682, GNorm = 0.5771, lr_0 = 1.5262e-04
Loss = 4.4251e-02, PNorm = 70.5704, GNorm = 0.4155, lr_0 = 1.5251e-04
Loss = 4.0425e-02, PNorm = 70.5716, GNorm = 0.4181, lr_0 = 1.5241e-04
Loss = 4.1298e-02, PNorm = 70.5741, GNorm = 0.5225, lr_0 = 1.5230e-04
Loss = 4.0110e-02, PNorm = 70.5773, GNorm = 0.4902, lr_0 = 1.5220e-04
Loss = 3.8838e-02, PNorm = 70.5794, GNorm = 0.4964, lr_0 = 1.5209e-04
Loss = 3.7727e-02, PNorm = 70.5818, GNorm = 0.4855, lr_0 = 1.5199e-04
Loss = 3.5490e-02, PNorm = 70.5834, GNorm = 0.4863, lr_0 = 1.5189e-04
Loss = 4.6405e-02, PNorm = 70.5857, GNorm = 0.4358, lr_0 = 1.5178e-04
Loss = 3.8496e-02, PNorm = 70.5884, GNorm = 0.7351, lr_0 = 1.5168e-04
Loss = 3.9289e-02, PNorm = 70.5913, GNorm = 0.4687, lr_0 = 1.5157e-04
Loss = 3.8367e-02, PNorm = 70.5944, GNorm = 0.3625, lr_0 = 1.5147e-04
Loss = 3.7817e-02, PNorm = 70.5967, GNorm = 0.4873, lr_0 = 1.5137e-04
Loss = 4.0946e-02, PNorm = 70.5980, GNorm = 0.6160, lr_0 = 1.5126e-04
Loss = 3.9762e-02, PNorm = 70.5992, GNorm = 0.4929, lr_0 = 1.5116e-04
Loss = 3.8505e-02, PNorm = 70.6002, GNorm = 0.4393, lr_0 = 1.5106e-04
Loss = 3.7126e-02, PNorm = 70.6000, GNorm = 0.4526, lr_0 = 1.5095e-04
Loss = 4.1394e-02, PNorm = 70.6019, GNorm = 0.5833, lr_0 = 1.5085e-04
Validation mae = 0.390504
Epoch 25
Loss = 3.1501e-02, PNorm = 70.6053, GNorm = 0.4138, lr_0 = 1.5075e-04
Loss = 3.1092e-02, PNorm = 70.6091, GNorm = 0.4987, lr_0 = 1.5064e-04
Loss = 2.8927e-02, PNorm = 70.6116, GNorm = 0.4436, lr_0 = 1.5054e-04
Loss = 3.3053e-02, PNorm = 70.6127, GNorm = 0.4207, lr_0 = 1.5044e-04
Loss = 2.8398e-02, PNorm = 70.6141, GNorm = 0.6453, lr_0 = 1.5033e-04
Loss = 3.7657e-02, PNorm = 70.6164, GNorm = 0.4799, lr_0 = 1.5023e-04
Loss = 3.5050e-02, PNorm = 70.6190, GNorm = 0.4510, lr_0 = 1.5013e-04
Loss = 3.5607e-02, PNorm = 70.6206, GNorm = 0.4829, lr_0 = 1.5002e-04
Loss = 3.5259e-02, PNorm = 70.6238, GNorm = 0.6280, lr_0 = 1.4992e-04
Loss = 3.7492e-02, PNorm = 70.6269, GNorm = 0.3710, lr_0 = 1.4982e-04
Loss = 3.3752e-02, PNorm = 70.6295, GNorm = 0.3971, lr_0 = 1.4972e-04
Loss = 2.8218e-02, PNorm = 70.6322, GNorm = 0.3512, lr_0 = 1.4961e-04
Loss = 3.1186e-02, PNorm = 70.6345, GNorm = 0.5700, lr_0 = 1.4951e-04
Loss = 3.9724e-02, PNorm = 70.6350, GNorm = 0.5837, lr_0 = 1.4941e-04
Loss = 3.3613e-02, PNorm = 70.6364, GNorm = 0.4126, lr_0 = 1.4931e-04
Loss = 3.9462e-02, PNorm = 70.6390, GNorm = 0.5822, lr_0 = 1.4920e-04
Loss = 3.7178e-02, PNorm = 70.6422, GNorm = 0.4910, lr_0 = 1.4910e-04
Loss = 3.5901e-02, PNorm = 70.6455, GNorm = 0.6178, lr_0 = 1.4900e-04
Loss = 3.8390e-02, PNorm = 70.6485, GNorm = 0.6797, lr_0 = 1.4890e-04
Loss = 3.5554e-02, PNorm = 70.6507, GNorm = 0.4465, lr_0 = 1.4880e-04
Loss = 3.7971e-02, PNorm = 70.6525, GNorm = 0.4718, lr_0 = 1.4869e-04
Loss = 3.5432e-02, PNorm = 70.6540, GNorm = 0.5704, lr_0 = 1.4859e-04
Loss = 3.1042e-02, PNorm = 70.6563, GNorm = 0.4412, lr_0 = 1.4849e-04
Loss = 3.5553e-02, PNorm = 70.6585, GNorm = 0.4676, lr_0 = 1.4839e-04
Loss = 3.3419e-02, PNorm = 70.6608, GNorm = 0.5074, lr_0 = 1.4829e-04
Loss = 3.5331e-02, PNorm = 70.6627, GNorm = 0.4400, lr_0 = 1.4818e-04
Loss = 3.6027e-02, PNorm = 70.6661, GNorm = 0.4706, lr_0 = 1.4808e-04
Loss = 3.6831e-02, PNorm = 70.6701, GNorm = 0.4466, lr_0 = 1.4798e-04
Loss = 3.4308e-02, PNorm = 70.6717, GNorm = 0.7266, lr_0 = 1.4788e-04
Loss = 3.4328e-02, PNorm = 70.6734, GNorm = 0.5795, lr_0 = 1.4778e-04
Loss = 3.4481e-02, PNorm = 70.6748, GNorm = 0.5313, lr_0 = 1.4768e-04
Loss = 3.6751e-02, PNorm = 70.6769, GNorm = 0.3884, lr_0 = 1.4758e-04
Loss = 3.5867e-02, PNorm = 70.6790, GNorm = 0.6886, lr_0 = 1.4748e-04
Loss = 3.9126e-02, PNorm = 70.6818, GNorm = 0.5341, lr_0 = 1.4737e-04
Loss = 3.5575e-02, PNorm = 70.6828, GNorm = 0.4120, lr_0 = 1.4727e-04
Loss = 3.7775e-02, PNorm = 70.6851, GNorm = 0.5028, lr_0 = 1.4717e-04
Loss = 3.0510e-02, PNorm = 70.6879, GNorm = 0.3967, lr_0 = 1.4707e-04
Loss = 3.5889e-02, PNorm = 70.6898, GNorm = 0.3757, lr_0 = 1.4697e-04
Loss = 3.5441e-02, PNorm = 70.6907, GNorm = 0.4635, lr_0 = 1.4687e-04
Loss = 3.9668e-02, PNorm = 70.6929, GNorm = 0.5024, lr_0 = 1.4677e-04
Loss = 3.8610e-02, PNorm = 70.6961, GNorm = 0.4243, lr_0 = 1.4667e-04
Loss = 3.5031e-02, PNorm = 70.6986, GNorm = 0.4345, lr_0 = 1.4657e-04
Loss = 3.0273e-02, PNorm = 70.7013, GNorm = 0.7189, lr_0 = 1.4647e-04
Loss = 3.3539e-02, PNorm = 70.7030, GNorm = 0.4364, lr_0 = 1.4637e-04
Loss = 3.1839e-02, PNorm = 70.7047, GNorm = 0.4475, lr_0 = 1.4627e-04
Loss = 3.1404e-02, PNorm = 70.7077, GNorm = 0.6592, lr_0 = 1.4617e-04
Loss = 3.6906e-02, PNorm = 70.7102, GNorm = 0.4462, lr_0 = 1.4607e-04
Loss = 2.7684e-02, PNorm = 70.7130, GNorm = 0.3654, lr_0 = 1.4597e-04
Loss = 2.9516e-02, PNorm = 70.7149, GNorm = 0.3935, lr_0 = 1.4587e-04
Loss = 3.7450e-02, PNorm = 70.7168, GNorm = 0.5123, lr_0 = 1.4577e-04
Loss = 3.4124e-02, PNorm = 70.7189, GNorm = 0.3966, lr_0 = 1.4567e-04
Loss = 3.9438e-02, PNorm = 70.7205, GNorm = 0.4837, lr_0 = 1.4557e-04
Loss = 2.9634e-02, PNorm = 70.7220, GNorm = 0.6884, lr_0 = 1.4547e-04
Loss = 2.8991e-02, PNorm = 70.7243, GNorm = 0.4159, lr_0 = 1.4537e-04
Loss = 4.0221e-02, PNorm = 70.7262, GNorm = 0.4292, lr_0 = 1.4527e-04
Loss = 3.2396e-02, PNorm = 70.7275, GNorm = 0.4791, lr_0 = 1.4517e-04
Loss = 3.3452e-02, PNorm = 70.7286, GNorm = 0.5792, lr_0 = 1.4507e-04
Loss = 3.0034e-02, PNorm = 70.7300, GNorm = 0.6018, lr_0 = 1.4497e-04
Loss = 3.6124e-02, PNorm = 70.7314, GNorm = 0.7185, lr_0 = 1.4487e-04
Loss = 3.6797e-02, PNorm = 70.7334, GNorm = 0.5516, lr_0 = 1.4477e-04
Loss = 4.4462e-02, PNorm = 70.7358, GNorm = 0.5454, lr_0 = 1.4467e-04
Loss = 3.4245e-02, PNorm = 70.7378, GNorm = 0.4380, lr_0 = 1.4457e-04
Loss = 4.1452e-02, PNorm = 70.7402, GNorm = 0.7480, lr_0 = 1.4447e-04
Loss = 4.1733e-02, PNorm = 70.7415, GNorm = 0.4522, lr_0 = 1.4438e-04
Loss = 4.0762e-02, PNorm = 70.7440, GNorm = 0.8092, lr_0 = 1.4428e-04
Loss = 4.3343e-02, PNorm = 70.7468, GNorm = 0.8016, lr_0 = 1.4418e-04
Loss = 3.8610e-02, PNorm = 70.7496, GNorm = 0.5182, lr_0 = 1.4408e-04
Loss = 2.9746e-02, PNorm = 70.7504, GNorm = 0.4820, lr_0 = 1.4398e-04
Loss = 3.8996e-02, PNorm = 70.7515, GNorm = 0.5520, lr_0 = 1.4388e-04
Loss = 4.0872e-02, PNorm = 70.7536, GNorm = 0.5553, lr_0 = 1.4378e-04
Loss = 3.6059e-02, PNorm = 70.7568, GNorm = 0.4973, lr_0 = 1.4368e-04
Loss = 3.8291e-02, PNorm = 70.7590, GNorm = 0.5846, lr_0 = 1.4359e-04
Loss = 3.8942e-02, PNorm = 70.7620, GNorm = 0.4221, lr_0 = 1.4349e-04
Loss = 3.8689e-02, PNorm = 70.7643, GNorm = 0.5371, lr_0 = 1.4339e-04
Loss = 4.3799e-02, PNorm = 70.7667, GNorm = 0.4796, lr_0 = 1.4329e-04
Loss = 4.0210e-02, PNorm = 70.7708, GNorm = 0.5358, lr_0 = 1.4319e-04
Loss = 3.3619e-02, PNorm = 70.7743, GNorm = 0.4576, lr_0 = 1.4310e-04
Loss = 3.4274e-02, PNorm = 70.7756, GNorm = 0.6798, lr_0 = 1.4300e-04
Loss = 3.6393e-02, PNorm = 70.7766, GNorm = 0.5257, lr_0 = 1.4290e-04
Loss = 3.4873e-02, PNorm = 70.7782, GNorm = 0.6620, lr_0 = 1.4280e-04
Loss = 3.7396e-02, PNorm = 70.7811, GNorm = 0.5591, lr_0 = 1.4270e-04
Loss = 3.8132e-02, PNorm = 70.7843, GNorm = 0.4659, lr_0 = 1.4261e-04
Loss = 3.2240e-02, PNorm = 70.7862, GNorm = 0.5017, lr_0 = 1.4251e-04
Loss = 3.3428e-02, PNorm = 70.7879, GNorm = 0.4503, lr_0 = 1.4241e-04
Loss = 3.2922e-02, PNorm = 70.7896, GNorm = 0.6554, lr_0 = 1.4231e-04
Loss = 4.1584e-02, PNorm = 70.7912, GNorm = 0.8748, lr_0 = 1.4222e-04
Loss = 3.8645e-02, PNorm = 70.7930, GNorm = 0.6998, lr_0 = 1.4212e-04
Loss = 3.7633e-02, PNorm = 70.7953, GNorm = 0.5106, lr_0 = 1.4202e-04
Loss = 3.5427e-02, PNorm = 70.7984, GNorm = 0.4797, lr_0 = 1.4192e-04
Loss = 4.4076e-02, PNorm = 70.8011, GNorm = 0.4916, lr_0 = 1.4183e-04
Loss = 3.7269e-02, PNorm = 70.8037, GNorm = 0.7707, lr_0 = 1.4173e-04
Loss = 3.3281e-02, PNorm = 70.8053, GNorm = 0.4042, lr_0 = 1.4163e-04
Loss = 3.6965e-02, PNorm = 70.8065, GNorm = 0.5380, lr_0 = 1.4153e-04
Loss = 3.5629e-02, PNorm = 70.8089, GNorm = 0.3987, lr_0 = 1.4144e-04
Loss = 3.1588e-02, PNorm = 70.8108, GNorm = 0.5139, lr_0 = 1.4134e-04
Loss = 3.3913e-02, PNorm = 70.8124, GNorm = 0.5509, lr_0 = 1.4124e-04
Loss = 3.0368e-02, PNorm = 70.8146, GNorm = 0.4510, lr_0 = 1.4115e-04
Loss = 3.8972e-02, PNorm = 70.8160, GNorm = 0.3946, lr_0 = 1.4105e-04
Loss = 3.6550e-02, PNorm = 70.8180, GNorm = 0.5115, lr_0 = 1.4095e-04
Loss = 3.8480e-02, PNorm = 70.8199, GNorm = 0.4865, lr_0 = 1.4086e-04
Loss = 4.0469e-02, PNorm = 70.8220, GNorm = 0.6037, lr_0 = 1.4076e-04
Loss = 3.1125e-02, PNorm = 70.8241, GNorm = 0.5645, lr_0 = 1.4066e-04
Loss = 4.3562e-02, PNorm = 70.8266, GNorm = 0.4747, lr_0 = 1.4057e-04
Loss = 4.0764e-02, PNorm = 70.8291, GNorm = 0.7826, lr_0 = 1.4047e-04
Loss = 4.0109e-02, PNorm = 70.8307, GNorm = 0.5618, lr_0 = 1.4038e-04
Loss = 3.7095e-02, PNorm = 70.8331, GNorm = 0.5740, lr_0 = 1.4028e-04
Loss = 3.7070e-02, PNorm = 70.8350, GNorm = 0.4757, lr_0 = 1.4018e-04
Loss = 3.4110e-02, PNorm = 70.8380, GNorm = 0.4928, lr_0 = 1.4009e-04
Loss = 4.1437e-02, PNorm = 70.8414, GNorm = 0.6376, lr_0 = 1.3999e-04
Loss = 3.7715e-02, PNorm = 70.8438, GNorm = 0.3072, lr_0 = 1.3990e-04
Loss = 4.0785e-02, PNorm = 70.8451, GNorm = 0.3649, lr_0 = 1.3980e-04
Loss = 3.3703e-02, PNorm = 70.8474, GNorm = 0.3563, lr_0 = 1.3970e-04
Loss = 3.8609e-02, PNorm = 70.8493, GNorm = 0.6315, lr_0 = 1.3961e-04
Loss = 3.8345e-02, PNorm = 70.8489, GNorm = 0.6535, lr_0 = 1.3951e-04
Loss = 3.6720e-02, PNorm = 70.8500, GNorm = 0.9611, lr_0 = 1.3942e-04
Loss = 4.0012e-02, PNorm = 70.8525, GNorm = 0.3035, lr_0 = 1.3932e-04
Loss = 3.9047e-02, PNorm = 70.8552, GNorm = 0.5497, lr_0 = 1.3923e-04
Loss = 3.3473e-02, PNorm = 70.8569, GNorm = 0.5298, lr_0 = 1.3913e-04
Loss = 3.4726e-02, PNorm = 70.8585, GNorm = 0.4246, lr_0 = 1.3904e-04
Loss = 3.8344e-02, PNorm = 70.8610, GNorm = 0.4773, lr_0 = 1.3894e-04
Validation mae = 0.387707
Epoch 26
Loss = 2.7831e-02, PNorm = 70.8633, GNorm = 0.4186, lr_0 = 1.3884e-04
Loss = 3.8278e-02, PNorm = 70.8650, GNorm = 0.4148, lr_0 = 1.3875e-04
Loss = 3.2658e-02, PNorm = 70.8681, GNorm = 0.6373, lr_0 = 1.3865e-04
Loss = 3.4191e-02, PNorm = 70.8717, GNorm = 0.4928, lr_0 = 1.3856e-04
Loss = 3.1299e-02, PNorm = 70.8746, GNorm = 0.3933, lr_0 = 1.3846e-04
Loss = 3.7901e-02, PNorm = 70.8771, GNorm = 0.4323, lr_0 = 1.3837e-04
Loss = 2.7610e-02, PNorm = 70.8789, GNorm = 0.5844, lr_0 = 1.3828e-04
Loss = 3.4117e-02, PNorm = 70.8803, GNorm = 0.4731, lr_0 = 1.3818e-04
Loss = 2.8536e-02, PNorm = 70.8821, GNorm = 0.4825, lr_0 = 1.3809e-04
Loss = 3.0951e-02, PNorm = 70.8840, GNorm = 0.6150, lr_0 = 1.3799e-04
Loss = 3.7152e-02, PNorm = 70.8860, GNorm = 0.4654, lr_0 = 1.3790e-04
Loss = 3.2023e-02, PNorm = 70.8889, GNorm = 0.4199, lr_0 = 1.3780e-04
Loss = 3.2584e-02, PNorm = 70.8908, GNorm = 0.5066, lr_0 = 1.3771e-04
Loss = 3.1288e-02, PNorm = 70.8921, GNorm = 0.3693, lr_0 = 1.3761e-04
Loss = 3.9126e-02, PNorm = 70.8948, GNorm = 0.4800, lr_0 = 1.3752e-04
Loss = 3.1766e-02, PNorm = 70.8975, GNorm = 0.4981, lr_0 = 1.3742e-04
Loss = 3.2825e-02, PNorm = 70.9000, GNorm = 0.4647, lr_0 = 1.3733e-04
Loss = 2.9752e-02, PNorm = 70.9016, GNorm = 0.3756, lr_0 = 1.3724e-04
Loss = 3.1445e-02, PNorm = 70.9033, GNorm = 0.3536, lr_0 = 1.3714e-04
Loss = 3.0808e-02, PNorm = 70.9053, GNorm = 0.5392, lr_0 = 1.3705e-04
Loss = 3.3087e-02, PNorm = 70.9074, GNorm = 0.7267, lr_0 = 1.3695e-04
Loss = 4.0224e-02, PNorm = 70.9102, GNorm = 0.4685, lr_0 = 1.3686e-04
Loss = 3.6533e-02, PNorm = 70.9129, GNorm = 0.5947, lr_0 = 1.3677e-04
Loss = 3.6087e-02, PNorm = 70.9148, GNorm = 0.3610, lr_0 = 1.3667e-04
Loss = 3.7109e-02, PNorm = 70.9177, GNorm = 1.0842, lr_0 = 1.3658e-04
Loss = 3.4098e-02, PNorm = 70.9215, GNorm = 0.3933, lr_0 = 1.3649e-04
Loss = 3.5107e-02, PNorm = 70.9237, GNorm = 0.4739, lr_0 = 1.3639e-04
Loss = 3.5583e-02, PNorm = 70.9253, GNorm = 0.6131, lr_0 = 1.3630e-04
Loss = 3.0137e-02, PNorm = 70.9268, GNorm = 0.5369, lr_0 = 1.3621e-04
Loss = 3.4588e-02, PNorm = 70.9274, GNorm = 0.4642, lr_0 = 1.3611e-04
Loss = 3.3465e-02, PNorm = 70.9270, GNorm = 0.5744, lr_0 = 1.3602e-04
Loss = 3.8374e-02, PNorm = 70.9281, GNorm = 0.4970, lr_0 = 1.3593e-04
Loss = 3.2770e-02, PNorm = 70.9299, GNorm = 0.5143, lr_0 = 1.3583e-04
Loss = 3.9454e-02, PNorm = 70.9315, GNorm = 0.5688, lr_0 = 1.3574e-04
Loss = 3.2604e-02, PNorm = 70.9337, GNorm = 0.4627, lr_0 = 1.3565e-04
Loss = 3.5257e-02, PNorm = 70.9351, GNorm = 0.5803, lr_0 = 1.3555e-04
Loss = 3.3299e-02, PNorm = 70.9377, GNorm = 0.4187, lr_0 = 1.3546e-04
Loss = 3.2324e-02, PNorm = 70.9397, GNorm = 0.4648, lr_0 = 1.3537e-04
Loss = 2.8288e-02, PNorm = 70.9419, GNorm = 0.4029, lr_0 = 1.3528e-04
Loss = 2.6787e-02, PNorm = 70.9432, GNorm = 0.4509, lr_0 = 1.3518e-04
Loss = 3.2842e-02, PNorm = 70.9455, GNorm = 0.3777, lr_0 = 1.3509e-04
Loss = 3.3682e-02, PNorm = 70.9470, GNorm = 0.6065, lr_0 = 1.3500e-04
Loss = 3.2982e-02, PNorm = 70.9494, GNorm = 0.4041, lr_0 = 1.3491e-04
Loss = 3.2612e-02, PNorm = 70.9514, GNorm = 0.5195, lr_0 = 1.3481e-04
Loss = 3.6779e-02, PNorm = 70.9537, GNorm = 0.3299, lr_0 = 1.3472e-04
Loss = 3.6220e-02, PNorm = 70.9558, GNorm = 0.5271, lr_0 = 1.3463e-04
Loss = 3.1087e-02, PNorm = 70.9577, GNorm = 0.3412, lr_0 = 1.3454e-04
Loss = 4.2282e-02, PNorm = 70.9594, GNorm = 0.4958, lr_0 = 1.3444e-04
Loss = 3.5785e-02, PNorm = 70.9628, GNorm = 0.6628, lr_0 = 1.3435e-04
Loss = 2.9585e-02, PNorm = 70.9652, GNorm = 0.4336, lr_0 = 1.3426e-04
Loss = 3.6179e-02, PNorm = 70.9675, GNorm = 0.5434, lr_0 = 1.3417e-04
Loss = 3.2788e-02, PNorm = 70.9692, GNorm = 0.4395, lr_0 = 1.3408e-04
Loss = 3.4023e-02, PNorm = 70.9705, GNorm = 0.5320, lr_0 = 1.3398e-04
Loss = 2.8026e-02, PNorm = 70.9710, GNorm = 0.3902, lr_0 = 1.3389e-04
Loss = 3.7013e-02, PNorm = 70.9727, GNorm = 0.4396, lr_0 = 1.3380e-04
Loss = 3.2387e-02, PNorm = 70.9740, GNorm = 0.3675, lr_0 = 1.3371e-04
Loss = 2.9926e-02, PNorm = 70.9754, GNorm = 0.4860, lr_0 = 1.3362e-04
Loss = 3.2000e-02, PNorm = 70.9767, GNorm = 0.4014, lr_0 = 1.3353e-04
Loss = 3.4634e-02, PNorm = 70.9794, GNorm = 0.5308, lr_0 = 1.3343e-04
Loss = 3.4958e-02, PNorm = 70.9817, GNorm = 0.3998, lr_0 = 1.3334e-04
Loss = 3.6473e-02, PNorm = 70.9836, GNorm = 0.5700, lr_0 = 1.3325e-04
Loss = 3.3105e-02, PNorm = 70.9843, GNorm = 0.4079, lr_0 = 1.3316e-04
Loss = 3.3272e-02, PNorm = 70.9851, GNorm = 0.6521, lr_0 = 1.3307e-04
Loss = 3.1927e-02, PNorm = 70.9867, GNorm = 0.4304, lr_0 = 1.3298e-04
Loss = 3.7655e-02, PNorm = 70.9887, GNorm = 0.4547, lr_0 = 1.3289e-04
Loss = 3.8466e-02, PNorm = 70.9912, GNorm = 0.4555, lr_0 = 1.3280e-04
Loss = 3.0949e-02, PNorm = 70.9939, GNorm = 0.4300, lr_0 = 1.3270e-04
Loss = 3.2886e-02, PNorm = 70.9961, GNorm = 0.4551, lr_0 = 1.3261e-04
Loss = 3.5030e-02, PNorm = 70.9978, GNorm = 0.4455, lr_0 = 1.3252e-04
Loss = 3.5897e-02, PNorm = 71.0001, GNorm = 0.3388, lr_0 = 1.3243e-04
Loss = 3.7138e-02, PNorm = 71.0028, GNorm = 0.5855, lr_0 = 1.3234e-04
Loss = 3.3913e-02, PNorm = 71.0042, GNorm = 0.6488, lr_0 = 1.3225e-04
Loss = 3.6302e-02, PNorm = 71.0056, GNorm = 0.5512, lr_0 = 1.3216e-04
Loss = 3.1538e-02, PNorm = 71.0078, GNorm = 0.4589, lr_0 = 1.3207e-04
Loss = 3.2988e-02, PNorm = 71.0095, GNorm = 0.5311, lr_0 = 1.3198e-04
Loss = 3.5499e-02, PNorm = 71.0114, GNorm = 0.5939, lr_0 = 1.3189e-04
Loss = 3.5184e-02, PNorm = 71.0131, GNorm = 0.5512, lr_0 = 1.3180e-04
Loss = 3.2281e-02, PNorm = 71.0148, GNorm = 0.4970, lr_0 = 1.3171e-04
Loss = 3.9112e-02, PNorm = 71.0167, GNorm = 0.4725, lr_0 = 1.3162e-04
Loss = 3.1447e-02, PNorm = 71.0185, GNorm = 0.5872, lr_0 = 1.3153e-04
Loss = 3.1839e-02, PNorm = 71.0192, GNorm = 0.4481, lr_0 = 1.3144e-04
Loss = 3.2697e-02, PNorm = 71.0208, GNorm = 0.3265, lr_0 = 1.3135e-04
Loss = 3.4186e-02, PNorm = 71.0216, GNorm = 0.4923, lr_0 = 1.3126e-04
Loss = 2.8499e-02, PNorm = 71.0228, GNorm = 0.3701, lr_0 = 1.3117e-04
Loss = 3.2301e-02, PNorm = 71.0242, GNorm = 0.4287, lr_0 = 1.3108e-04
Loss = 2.8600e-02, PNorm = 71.0255, GNorm = 0.5761, lr_0 = 1.3099e-04
Loss = 3.5931e-02, PNorm = 71.0271, GNorm = 0.3827, lr_0 = 1.3090e-04
Loss = 3.5650e-02, PNorm = 71.0290, GNorm = 0.4353, lr_0 = 1.3081e-04
Loss = 3.6112e-02, PNorm = 71.0315, GNorm = 0.4356, lr_0 = 1.3072e-04
Loss = 3.5244e-02, PNorm = 71.0339, GNorm = 0.3646, lr_0 = 1.3063e-04
Loss = 2.9836e-02, PNorm = 71.0351, GNorm = 0.4426, lr_0 = 1.3054e-04
Loss = 3.4925e-02, PNorm = 71.0364, GNorm = 0.5454, lr_0 = 1.3045e-04
Loss = 3.2548e-02, PNorm = 71.0384, GNorm = 0.7789, lr_0 = 1.3036e-04
Loss = 4.1452e-02, PNorm = 71.0417, GNorm = 0.4926, lr_0 = 1.3027e-04
Loss = 3.5315e-02, PNorm = 71.0448, GNorm = 0.4011, lr_0 = 1.3018e-04
Loss = 3.2934e-02, PNorm = 71.0468, GNorm = 0.4410, lr_0 = 1.3009e-04
Loss = 3.8410e-02, PNorm = 71.0485, GNorm = 0.6030, lr_0 = 1.3000e-04
Loss = 2.6962e-02, PNorm = 71.0505, GNorm = 0.3688, lr_0 = 1.2992e-04
Loss = 3.2807e-02, PNorm = 71.0515, GNorm = 0.5962, lr_0 = 1.2983e-04
Loss = 3.3362e-02, PNorm = 71.0525, GNorm = 0.5723, lr_0 = 1.2974e-04
Loss = 3.2439e-02, PNorm = 71.0535, GNorm = 0.3866, lr_0 = 1.2965e-04
Loss = 3.7132e-02, PNorm = 71.0544, GNorm = 0.5593, lr_0 = 1.2956e-04
Loss = 2.9852e-02, PNorm = 71.0564, GNorm = 0.4002, lr_0 = 1.2947e-04
Loss = 3.1845e-02, PNorm = 71.0580, GNorm = 0.3913, lr_0 = 1.2938e-04
Loss = 3.9881e-02, PNorm = 71.0594, GNorm = 0.6718, lr_0 = 1.2929e-04
Loss = 3.4548e-02, PNorm = 71.0600, GNorm = 0.4916, lr_0 = 1.2921e-04
Loss = 3.3299e-02, PNorm = 71.0606, GNorm = 0.4104, lr_0 = 1.2912e-04
Loss = 3.7759e-02, PNorm = 71.0626, GNorm = 0.3781, lr_0 = 1.2903e-04
Loss = 3.8731e-02, PNorm = 71.0653, GNorm = 0.3175, lr_0 = 1.2894e-04
Loss = 3.9353e-02, PNorm = 71.0686, GNorm = 0.5650, lr_0 = 1.2885e-04
Loss = 3.8492e-02, PNorm = 71.0706, GNorm = 0.4394, lr_0 = 1.2876e-04
Loss = 3.2043e-02, PNorm = 71.0724, GNorm = 0.4044, lr_0 = 1.2867e-04
Loss = 4.0725e-02, PNorm = 71.0743, GNorm = 0.6396, lr_0 = 1.2859e-04
Loss = 3.8051e-02, PNorm = 71.0747, GNorm = 0.4214, lr_0 = 1.2850e-04
Loss = 3.2798e-02, PNorm = 71.0764, GNorm = 0.4276, lr_0 = 1.2841e-04
Loss = 3.9799e-02, PNorm = 71.0790, GNorm = 0.4186, lr_0 = 1.2832e-04
Loss = 3.3225e-02, PNorm = 71.0810, GNorm = 0.4305, lr_0 = 1.2823e-04
Loss = 3.6268e-02, PNorm = 71.0824, GNorm = 0.5010, lr_0 = 1.2815e-04
Loss = 3.7922e-02, PNorm = 71.0840, GNorm = 0.6129, lr_0 = 1.2806e-04
Loss = 3.4862e-02, PNorm = 71.0854, GNorm = 0.3650, lr_0 = 1.2797e-04
Validation mae = 0.388072
Epoch 27
Loss = 2.8473e-02, PNorm = 71.0865, GNorm = 0.4497, lr_0 = 1.2788e-04
Loss = 3.0134e-02, PNorm = 71.0878, GNorm = 0.4644, lr_0 = 1.2780e-04
Loss = 2.9299e-02, PNorm = 71.0894, GNorm = 0.5897, lr_0 = 1.2771e-04
Loss = 3.6225e-02, PNorm = 71.0904, GNorm = 0.3826, lr_0 = 1.2762e-04
Loss = 3.1220e-02, PNorm = 71.0919, GNorm = 0.3468, lr_0 = 1.2753e-04
Loss = 2.9636e-02, PNorm = 71.0946, GNorm = 0.6239, lr_0 = 1.2745e-04
Loss = 2.9981e-02, PNorm = 71.0968, GNorm = 0.3432, lr_0 = 1.2736e-04
Loss = 4.2558e-02, PNorm = 71.0989, GNorm = 0.5261, lr_0 = 1.2727e-04
Loss = 3.6809e-02, PNorm = 71.1011, GNorm = 0.5356, lr_0 = 1.2718e-04
Loss = 3.2491e-02, PNorm = 71.1031, GNorm = 0.4273, lr_0 = 1.2710e-04
Loss = 2.5563e-02, PNorm = 71.1051, GNorm = 0.4739, lr_0 = 1.2701e-04
Loss = 3.0912e-02, PNorm = 71.1058, GNorm = 0.4337, lr_0 = 1.2692e-04
Loss = 2.9676e-02, PNorm = 71.1061, GNorm = 0.4924, lr_0 = 1.2684e-04
Loss = 3.2582e-02, PNorm = 71.1078, GNorm = 0.5036, lr_0 = 1.2675e-04
Loss = 3.1574e-02, PNorm = 71.1106, GNorm = 0.5821, lr_0 = 1.2666e-04
Loss = 3.3923e-02, PNorm = 71.1123, GNorm = 0.5829, lr_0 = 1.2658e-04
Loss = 3.3800e-02, PNorm = 71.1136, GNorm = 0.3574, lr_0 = 1.2649e-04
Loss = 2.5950e-02, PNorm = 71.1153, GNorm = 0.4396, lr_0 = 1.2640e-04
Loss = 2.8734e-02, PNorm = 71.1174, GNorm = 0.4721, lr_0 = 1.2632e-04
Loss = 3.4460e-02, PNorm = 71.1199, GNorm = 0.5634, lr_0 = 1.2623e-04
Loss = 3.0848e-02, PNorm = 71.1219, GNorm = 0.5076, lr_0 = 1.2614e-04
Loss = 2.9339e-02, PNorm = 71.1235, GNorm = 0.6228, lr_0 = 1.2606e-04
Loss = 2.8288e-02, PNorm = 71.1239, GNorm = 0.4832, lr_0 = 1.2597e-04
Loss = 3.3590e-02, PNorm = 71.1261, GNorm = 0.4406, lr_0 = 1.2588e-04
Loss = 3.5522e-02, PNorm = 71.1284, GNorm = 0.4171, lr_0 = 1.2580e-04
Loss = 2.8375e-02, PNorm = 71.1305, GNorm = 0.4571, lr_0 = 1.2571e-04
Loss = 3.1845e-02, PNorm = 71.1332, GNorm = 0.5251, lr_0 = 1.2563e-04
Loss = 3.2689e-02, PNorm = 71.1352, GNorm = 0.4302, lr_0 = 1.2554e-04
Loss = 2.9421e-02, PNorm = 71.1372, GNorm = 0.3369, lr_0 = 1.2545e-04
Loss = 3.3377e-02, PNorm = 71.1388, GNorm = 0.4499, lr_0 = 1.2537e-04
Loss = 3.4931e-02, PNorm = 71.1410, GNorm = 0.5139, lr_0 = 1.2528e-04
Loss = 3.4061e-02, PNorm = 71.1419, GNorm = 0.5023, lr_0 = 1.2520e-04
Loss = 3.1802e-02, PNorm = 71.1441, GNorm = 0.4600, lr_0 = 1.2511e-04
Loss = 3.1142e-02, PNorm = 71.1461, GNorm = 0.4120, lr_0 = 1.2502e-04
Loss = 3.2488e-02, PNorm = 71.1471, GNorm = 0.4186, lr_0 = 1.2494e-04
Loss = 3.5222e-02, PNorm = 71.1484, GNorm = 0.4434, lr_0 = 1.2485e-04
Loss = 2.9062e-02, PNorm = 71.1504, GNorm = 0.5562, lr_0 = 1.2477e-04
Loss = 3.1842e-02, PNorm = 71.1520, GNorm = 0.4163, lr_0 = 1.2468e-04
Loss = 3.6619e-02, PNorm = 71.1543, GNorm = 0.4870, lr_0 = 1.2460e-04
Loss = 2.9723e-02, PNorm = 71.1577, GNorm = 0.4407, lr_0 = 1.2451e-04
Loss = 2.7861e-02, PNorm = 71.1604, GNorm = 0.3534, lr_0 = 1.2443e-04
Loss = 3.1847e-02, PNorm = 71.1626, GNorm = 0.4954, lr_0 = 1.2434e-04
Loss = 2.9787e-02, PNorm = 71.1649, GNorm = 0.3768, lr_0 = 1.2426e-04
Loss = 3.4305e-02, PNorm = 71.1673, GNorm = 0.4864, lr_0 = 1.2417e-04
Loss = 3.1280e-02, PNorm = 71.1690, GNorm = 0.4425, lr_0 = 1.2409e-04
Loss = 3.0417e-02, PNorm = 71.1701, GNorm = 0.5570, lr_0 = 1.2400e-04
Loss = 2.7330e-02, PNorm = 71.1716, GNorm = 0.4078, lr_0 = 1.2392e-04
Loss = 3.2148e-02, PNorm = 71.1731, GNorm = 0.6877, lr_0 = 1.2383e-04
Loss = 2.8504e-02, PNorm = 71.1749, GNorm = 0.4354, lr_0 = 1.2375e-04
Loss = 3.1385e-02, PNorm = 71.1771, GNorm = 0.4649, lr_0 = 1.2366e-04
Loss = 3.3386e-02, PNorm = 71.1789, GNorm = 0.5076, lr_0 = 1.2358e-04
Loss = 3.1223e-02, PNorm = 71.1810, GNorm = 0.5451, lr_0 = 1.2349e-04
Loss = 3.5452e-02, PNorm = 71.1829, GNorm = 0.4766, lr_0 = 1.2341e-04
Loss = 3.0067e-02, PNorm = 71.1839, GNorm = 0.4647, lr_0 = 1.2332e-04
Loss = 2.5213e-02, PNorm = 71.1850, GNorm = 0.5388, lr_0 = 1.2324e-04
Loss = 3.0426e-02, PNorm = 71.1856, GNorm = 0.4363, lr_0 = 1.2315e-04
Loss = 3.4180e-02, PNorm = 71.1869, GNorm = 0.5920, lr_0 = 1.2307e-04
Loss = 3.4503e-02, PNorm = 71.1885, GNorm = 0.6104, lr_0 = 1.2298e-04
Loss = 3.4810e-02, PNorm = 71.1901, GNorm = 0.4403, lr_0 = 1.2290e-04
Loss = 3.9439e-02, PNorm = 71.1921, GNorm = 0.4361, lr_0 = 1.2282e-04
Loss = 2.9221e-02, PNorm = 71.1949, GNorm = 0.5982, lr_0 = 1.2273e-04
Loss = 3.5568e-02, PNorm = 71.1954, GNorm = 0.4754, lr_0 = 1.2265e-04
Loss = 3.1655e-02, PNorm = 71.1958, GNorm = 0.3925, lr_0 = 1.2256e-04
Loss = 3.0133e-02, PNorm = 71.1967, GNorm = 0.5806, lr_0 = 1.2248e-04
Loss = 3.4648e-02, PNorm = 71.1984, GNorm = 0.4532, lr_0 = 1.2240e-04
Loss = 3.4957e-02, PNorm = 71.2014, GNorm = 0.4714, lr_0 = 1.2231e-04
Loss = 3.1717e-02, PNorm = 71.2035, GNorm = 0.5584, lr_0 = 1.2223e-04
Loss = 2.8211e-02, PNorm = 71.2053, GNorm = 0.3541, lr_0 = 1.2214e-04
Loss = 3.6143e-02, PNorm = 71.2069, GNorm = 0.5885, lr_0 = 1.2206e-04
Loss = 3.2487e-02, PNorm = 71.2082, GNorm = 0.5895, lr_0 = 1.2198e-04
Loss = 3.2031e-02, PNorm = 71.2098, GNorm = 0.4724, lr_0 = 1.2189e-04
Loss = 3.6425e-02, PNorm = 71.2111, GNorm = 0.4961, lr_0 = 1.2181e-04
Loss = 3.4877e-02, PNorm = 71.2129, GNorm = 0.4665, lr_0 = 1.2173e-04
Loss = 3.1599e-02, PNorm = 71.2151, GNorm = 0.3978, lr_0 = 1.2164e-04
Loss = 3.0704e-02, PNorm = 71.2170, GNorm = 0.3607, lr_0 = 1.2156e-04
Loss = 3.1263e-02, PNorm = 71.2180, GNorm = 0.4558, lr_0 = 1.2148e-04
Loss = 3.2373e-02, PNorm = 71.2195, GNorm = 0.6827, lr_0 = 1.2139e-04
Loss = 3.5336e-02, PNorm = 71.2209, GNorm = 0.5160, lr_0 = 1.2131e-04
Loss = 3.2454e-02, PNorm = 71.2227, GNorm = 0.3757, lr_0 = 1.2123e-04
Loss = 3.3771e-02, PNorm = 71.2244, GNorm = 0.4806, lr_0 = 1.2114e-04
Loss = 3.4000e-02, PNorm = 71.2256, GNorm = 0.4177, lr_0 = 1.2106e-04
Loss = 3.0973e-02, PNorm = 71.2277, GNorm = 0.5854, lr_0 = 1.2098e-04
Loss = 3.1241e-02, PNorm = 71.2300, GNorm = 0.4790, lr_0 = 1.2090e-04
Loss = 3.1308e-02, PNorm = 71.2320, GNorm = 0.6761, lr_0 = 1.2081e-04
Loss = 3.5931e-02, PNorm = 71.2351, GNorm = 0.5207, lr_0 = 1.2073e-04
Loss = 3.5815e-02, PNorm = 71.2381, GNorm = 0.4565, lr_0 = 1.2065e-04
Loss = 3.3462e-02, PNorm = 71.2398, GNorm = 0.4237, lr_0 = 1.2056e-04
Loss = 3.3609e-02, PNorm = 71.2413, GNorm = 0.5086, lr_0 = 1.2048e-04
Loss = 3.1494e-02, PNorm = 71.2420, GNorm = 0.4587, lr_0 = 1.2040e-04
Loss = 3.8712e-02, PNorm = 71.2435, GNorm = 0.6337, lr_0 = 1.2032e-04
Loss = 3.4155e-02, PNorm = 71.2460, GNorm = 0.4972, lr_0 = 1.2023e-04
Loss = 3.1827e-02, PNorm = 71.2480, GNorm = 0.4540, lr_0 = 1.2015e-04
Loss = 3.4658e-02, PNorm = 71.2496, GNorm = 0.4454, lr_0 = 1.2007e-04
Loss = 3.3121e-02, PNorm = 71.2507, GNorm = 0.3451, lr_0 = 1.1999e-04
Loss = 3.4436e-02, PNorm = 71.2512, GNorm = 0.4402, lr_0 = 1.1991e-04
Loss = 2.8294e-02, PNorm = 71.2524, GNorm = 0.4583, lr_0 = 1.1982e-04
Loss = 3.1347e-02, PNorm = 71.2538, GNorm = 0.5478, lr_0 = 1.1974e-04
Loss = 3.2184e-02, PNorm = 71.2539, GNorm = 0.6527, lr_0 = 1.1966e-04
Loss = 3.1670e-02, PNorm = 71.2547, GNorm = 0.5071, lr_0 = 1.1958e-04
Loss = 3.6201e-02, PNorm = 71.2563, GNorm = 0.6467, lr_0 = 1.1950e-04
Loss = 3.5921e-02, PNorm = 71.2586, GNorm = 0.3666, lr_0 = 1.1941e-04
Loss = 3.7413e-02, PNorm = 71.2600, GNorm = 0.7078, lr_0 = 1.1933e-04
Loss = 3.5268e-02, PNorm = 71.2616, GNorm = 0.5403, lr_0 = 1.1925e-04
Loss = 3.4121e-02, PNorm = 71.2631, GNorm = 0.4809, lr_0 = 1.1917e-04
Loss = 3.4136e-02, PNorm = 71.2639, GNorm = 0.4827, lr_0 = 1.1909e-04
Loss = 3.2357e-02, PNorm = 71.2652, GNorm = 0.5377, lr_0 = 1.1901e-04
Loss = 3.5146e-02, PNorm = 71.2665, GNorm = 0.6171, lr_0 = 1.1892e-04
Loss = 3.2213e-02, PNorm = 71.2682, GNorm = 0.7336, lr_0 = 1.1884e-04
Loss = 3.7844e-02, PNorm = 71.2694, GNorm = 0.6850, lr_0 = 1.1876e-04
Loss = 3.1977e-02, PNorm = 71.2708, GNorm = 0.4788, lr_0 = 1.1868e-04
Loss = 3.5624e-02, PNorm = 71.2724, GNorm = 0.4432, lr_0 = 1.1860e-04
Loss = 3.6908e-02, PNorm = 71.2739, GNorm = 0.5144, lr_0 = 1.1852e-04
Loss = 2.8782e-02, PNorm = 71.2767, GNorm = 0.3670, lr_0 = 1.1844e-04
Loss = 3.1578e-02, PNorm = 71.2779, GNorm = 0.4191, lr_0 = 1.1835e-04
Loss = 3.4545e-02, PNorm = 71.2791, GNorm = 0.4749, lr_0 = 1.1827e-04
Loss = 3.6475e-02, PNorm = 71.2802, GNorm = 0.3910, lr_0 = 1.1819e-04
Loss = 3.3969e-02, PNorm = 71.2823, GNorm = 0.4678, lr_0 = 1.1811e-04
Loss = 3.4450e-02, PNorm = 71.2839, GNorm = 0.5975, lr_0 = 1.1803e-04
Loss = 3.3902e-02, PNorm = 71.2855, GNorm = 0.5897, lr_0 = 1.1795e-04
Loss = 3.3160e-02, PNorm = 71.2867, GNorm = 0.3438, lr_0 = 1.1787e-04
Validation mae = 0.387436
Epoch 28
Loss = 2.8113e-02, PNorm = 71.2889, GNorm = 0.4044, lr_0 = 1.1779e-04
Loss = 2.7733e-02, PNorm = 71.2910, GNorm = 0.4258, lr_0 = 1.1771e-04
Loss = 3.0886e-02, PNorm = 71.2932, GNorm = 0.5838, lr_0 = 1.1763e-04
Loss = 2.5322e-02, PNorm = 71.2950, GNorm = 0.3838, lr_0 = 1.1755e-04
Loss = 2.8109e-02, PNorm = 71.2968, GNorm = 0.5926, lr_0 = 1.1747e-04
Loss = 2.5594e-02, PNorm = 71.2982, GNorm = 0.4251, lr_0 = 1.1739e-04
Loss = 3.4004e-02, PNorm = 71.2998, GNorm = 0.4198, lr_0 = 1.1730e-04
Loss = 2.7255e-02, PNorm = 71.3015, GNorm = 0.3685, lr_0 = 1.1722e-04
Loss = 2.7570e-02, PNorm = 71.3038, GNorm = 0.5055, lr_0 = 1.1714e-04
Loss = 2.8551e-02, PNorm = 71.3059, GNorm = 0.4085, lr_0 = 1.1706e-04
Loss = 3.4167e-02, PNorm = 71.3071, GNorm = 0.5030, lr_0 = 1.1698e-04
Loss = 2.3211e-02, PNorm = 71.3086, GNorm = 0.4809, lr_0 = 1.1690e-04
Loss = 2.9244e-02, PNorm = 71.3101, GNorm = 0.4422, lr_0 = 1.1682e-04
Loss = 2.6806e-02, PNorm = 71.3116, GNorm = 0.4370, lr_0 = 1.1674e-04
Loss = 3.6954e-02, PNorm = 71.3131, GNorm = 0.3757, lr_0 = 1.1666e-04
Loss = 2.9204e-02, PNorm = 71.3142, GNorm = 0.5289, lr_0 = 1.1658e-04
Loss = 2.6431e-02, PNorm = 71.3157, GNorm = 0.4102, lr_0 = 1.1650e-04
Loss = 2.9796e-02, PNorm = 71.3173, GNorm = 0.3628, lr_0 = 1.1642e-04
Loss = 3.2856e-02, PNorm = 71.3195, GNorm = 0.4411, lr_0 = 1.1634e-04
Loss = 2.6472e-02, PNorm = 71.3219, GNorm = 0.5335, lr_0 = 1.1626e-04
Loss = 2.6069e-02, PNorm = 71.3237, GNorm = 0.5810, lr_0 = 1.1618e-04
Loss = 3.2123e-02, PNorm = 71.3244, GNorm = 0.6668, lr_0 = 1.1611e-04
Loss = 3.6085e-02, PNorm = 71.3260, GNorm = 0.5065, lr_0 = 1.1603e-04
Loss = 3.1397e-02, PNorm = 71.3276, GNorm = 0.6483, lr_0 = 1.1595e-04
Loss = 2.8660e-02, PNorm = 71.3299, GNorm = 0.3517, lr_0 = 1.1587e-04
Loss = 2.2786e-02, PNorm = 71.3316, GNorm = 0.3207, lr_0 = 1.1579e-04
Loss = 2.5731e-02, PNorm = 71.3326, GNorm = 0.3838, lr_0 = 1.1571e-04
Loss = 2.5810e-02, PNorm = 71.3336, GNorm = 0.4303, lr_0 = 1.1563e-04
Loss = 2.8619e-02, PNorm = 71.3348, GNorm = 0.3314, lr_0 = 1.1555e-04
Loss = 2.6366e-02, PNorm = 71.3358, GNorm = 0.4157, lr_0 = 1.1547e-04
Loss = 3.1824e-02, PNorm = 71.3373, GNorm = 0.4712, lr_0 = 1.1539e-04
Loss = 2.9547e-02, PNorm = 71.3384, GNorm = 0.4875, lr_0 = 1.1531e-04
Loss = 2.6768e-02, PNorm = 71.3395, GNorm = 0.5593, lr_0 = 1.1523e-04
Loss = 2.8946e-02, PNorm = 71.3399, GNorm = 0.3910, lr_0 = 1.1515e-04
Loss = 3.6100e-02, PNorm = 71.3410, GNorm = 0.5358, lr_0 = 1.1508e-04
Loss = 3.3587e-02, PNorm = 71.3431, GNorm = 0.7271, lr_0 = 1.1500e-04
Loss = 3.6945e-02, PNorm = 71.3458, GNorm = 0.3563, lr_0 = 1.1492e-04
Loss = 3.3457e-02, PNorm = 71.3483, GNorm = 0.9223, lr_0 = 1.1484e-04
Loss = 3.3767e-02, PNorm = 71.3512, GNorm = 0.4042, lr_0 = 1.1476e-04
Loss = 2.9783e-02, PNorm = 71.3530, GNorm = 0.4532, lr_0 = 1.1468e-04
Loss = 2.8400e-02, PNorm = 71.3546, GNorm = 0.3575, lr_0 = 1.1460e-04
Loss = 3.1429e-02, PNorm = 71.3555, GNorm = 0.6200, lr_0 = 1.1452e-04
Loss = 3.3708e-02, PNorm = 71.3574, GNorm = 0.6374, lr_0 = 1.1445e-04
Loss = 2.8987e-02, PNorm = 71.3585, GNorm = 0.4857, lr_0 = 1.1437e-04
Loss = 3.2670e-02, PNorm = 71.3597, GNorm = 0.4657, lr_0 = 1.1429e-04
Loss = 3.7251e-02, PNorm = 71.3612, GNorm = 0.5504, lr_0 = 1.1421e-04
Loss = 3.2973e-02, PNorm = 71.3626, GNorm = 0.4049, lr_0 = 1.1413e-04
Loss = 3.1926e-02, PNorm = 71.3641, GNorm = 0.3512, lr_0 = 1.1405e-04
Loss = 3.0626e-02, PNorm = 71.3653, GNorm = 0.6897, lr_0 = 1.1398e-04
Loss = 3.4582e-02, PNorm = 71.3661, GNorm = 0.3445, lr_0 = 1.1390e-04
Loss = 3.4762e-02, PNorm = 71.3684, GNorm = 0.3969, lr_0 = 1.1382e-04
Loss = 3.1927e-02, PNorm = 71.3703, GNorm = 0.5301, lr_0 = 1.1374e-04
Loss = 3.2289e-02, PNorm = 71.3714, GNorm = 0.5270, lr_0 = 1.1366e-04
Loss = 3.3467e-02, PNorm = 71.3722, GNorm = 0.5645, lr_0 = 1.1359e-04
Loss = 3.2331e-02, PNorm = 71.3738, GNorm = 0.4552, lr_0 = 1.1351e-04
Loss = 2.9274e-02, PNorm = 71.3759, GNorm = 0.5404, lr_0 = 1.1343e-04
Loss = 2.9657e-02, PNorm = 71.3766, GNorm = 0.4511, lr_0 = 1.1335e-04
Loss = 3.2009e-02, PNorm = 71.3774, GNorm = 0.3926, lr_0 = 1.1328e-04
Loss = 2.8218e-02, PNorm = 71.3794, GNorm = 0.4483, lr_0 = 1.1320e-04
Loss = 2.6843e-02, PNorm = 71.3822, GNorm = 0.6470, lr_0 = 1.1312e-04
Loss = 3.5600e-02, PNorm = 71.3843, GNorm = 0.4634, lr_0 = 1.1304e-04
Loss = 3.1325e-02, PNorm = 71.3864, GNorm = 0.5911, lr_0 = 1.1297e-04
Loss = 3.1156e-02, PNorm = 71.3883, GNorm = 0.3595, lr_0 = 1.1289e-04
Loss = 3.0711e-02, PNorm = 71.3894, GNorm = 0.7998, lr_0 = 1.1281e-04
Loss = 3.5044e-02, PNorm = 71.3914, GNorm = 0.4402, lr_0 = 1.1273e-04
Loss = 3.1752e-02, PNorm = 71.3940, GNorm = 0.4478, lr_0 = 1.1266e-04
Loss = 3.0727e-02, PNorm = 71.3960, GNorm = 0.3534, lr_0 = 1.1258e-04
Loss = 2.9838e-02, PNorm = 71.3969, GNorm = 0.6053, lr_0 = 1.1250e-04
Loss = 2.9116e-02, PNorm = 71.3979, GNorm = 0.3559, lr_0 = 1.1243e-04
Loss = 3.2812e-02, PNorm = 71.3990, GNorm = 0.5031, lr_0 = 1.1235e-04
Loss = 3.1132e-02, PNorm = 71.4006, GNorm = 0.6225, lr_0 = 1.1227e-04
Loss = 2.9327e-02, PNorm = 71.4027, GNorm = 0.5026, lr_0 = 1.1219e-04
Loss = 3.3722e-02, PNorm = 71.4037, GNorm = 0.6495, lr_0 = 1.1212e-04
Loss = 3.5777e-02, PNorm = 71.4055, GNorm = 0.3870, lr_0 = 1.1204e-04
Loss = 3.2989e-02, PNorm = 71.4073, GNorm = 0.4519, lr_0 = 1.1196e-04
Loss = 3.7388e-02, PNorm = 71.4092, GNorm = 0.4174, lr_0 = 1.1189e-04
Loss = 3.2929e-02, PNorm = 71.4115, GNorm = 0.4781, lr_0 = 1.1181e-04
Loss = 2.7965e-02, PNorm = 71.4131, GNorm = 0.6538, lr_0 = 1.1173e-04
Loss = 3.1811e-02, PNorm = 71.4155, GNorm = 0.5966, lr_0 = 1.1166e-04
Loss = 2.9953e-02, PNorm = 71.4183, GNorm = 0.5758, lr_0 = 1.1158e-04
Loss = 3.5166e-02, PNorm = 71.4207, GNorm = 0.4600, lr_0 = 1.1150e-04
Loss = 3.6096e-02, PNorm = 71.4221, GNorm = 0.7642, lr_0 = 1.1143e-04
Loss = 2.7208e-02, PNorm = 71.4234, GNorm = 0.4925, lr_0 = 1.1135e-04
Loss = 3.0220e-02, PNorm = 71.4245, GNorm = 0.4677, lr_0 = 1.1128e-04
Loss = 3.6076e-02, PNorm = 71.4260, GNorm = 0.5553, lr_0 = 1.1120e-04
Loss = 2.8566e-02, PNorm = 71.4265, GNorm = 0.3463, lr_0 = 1.1112e-04
Loss = 2.8654e-02, PNorm = 71.4270, GNorm = 0.5406, lr_0 = 1.1105e-04
Loss = 3.0732e-02, PNorm = 71.4281, GNorm = 0.5735, lr_0 = 1.1097e-04
Loss = 3.0014e-02, PNorm = 71.4287, GNorm = 0.6550, lr_0 = 1.1089e-04
Loss = 2.8131e-02, PNorm = 71.4289, GNorm = 0.6111, lr_0 = 1.1082e-04
Loss = 3.0139e-02, PNorm = 71.4296, GNorm = 0.5029, lr_0 = 1.1074e-04
Loss = 3.1514e-02, PNorm = 71.4307, GNorm = 0.5851, lr_0 = 1.1067e-04
Loss = 3.1659e-02, PNorm = 71.4332, GNorm = 0.6729, lr_0 = 1.1059e-04
Loss = 3.1925e-02, PNorm = 71.4348, GNorm = 0.5301, lr_0 = 1.1052e-04
Loss = 3.0848e-02, PNorm = 71.4360, GNorm = 0.4424, lr_0 = 1.1044e-04
Loss = 3.5678e-02, PNorm = 71.4372, GNorm = 0.8166, lr_0 = 1.1036e-04
Loss = 3.1694e-02, PNorm = 71.4393, GNorm = 0.3717, lr_0 = 1.1029e-04
Loss = 3.0115e-02, PNorm = 71.4416, GNorm = 0.4303, lr_0 = 1.1021e-04
Loss = 3.3263e-02, PNorm = 71.4435, GNorm = 0.5319, lr_0 = 1.1014e-04
Loss = 2.7470e-02, PNorm = 71.4445, GNorm = 0.5854, lr_0 = 1.1006e-04
Loss = 3.6125e-02, PNorm = 71.4464, GNorm = 0.4048, lr_0 = 1.0999e-04
Loss = 2.9209e-02, PNorm = 71.4485, GNorm = 0.5699, lr_0 = 1.0991e-04
Loss = 3.1288e-02, PNorm = 71.4504, GNorm = 0.5714, lr_0 = 1.0984e-04
Loss = 2.9874e-02, PNorm = 71.4519, GNorm = 0.5408, lr_0 = 1.0976e-04
Loss = 2.9854e-02, PNorm = 71.4535, GNorm = 0.4858, lr_0 = 1.0969e-04
Loss = 2.9928e-02, PNorm = 71.4555, GNorm = 0.4035, lr_0 = 1.0961e-04
Loss = 3.3739e-02, PNorm = 71.4569, GNorm = 0.4413, lr_0 = 1.0954e-04
Loss = 3.3147e-02, PNorm = 71.4581, GNorm = 0.4003, lr_0 = 1.0946e-04
Loss = 3.4618e-02, PNorm = 71.4591, GNorm = 0.4480, lr_0 = 1.0939e-04
Loss = 3.8592e-02, PNorm = 71.4607, GNorm = 0.4428, lr_0 = 1.0931e-04
Loss = 3.5049e-02, PNorm = 71.4624, GNorm = 0.4447, lr_0 = 1.0924e-04
Loss = 2.5917e-02, PNorm = 71.4647, GNorm = 0.3266, lr_0 = 1.0916e-04
Loss = 2.9873e-02, PNorm = 71.4668, GNorm = 0.4910, lr_0 = 1.0909e-04
Loss = 3.3191e-02, PNorm = 71.4687, GNorm = 0.5660, lr_0 = 1.0901e-04
Loss = 2.7378e-02, PNorm = 71.4700, GNorm = 0.5010, lr_0 = 1.0894e-04
Loss = 3.4384e-02, PNorm = 71.4718, GNorm = 0.4572, lr_0 = 1.0886e-04
Loss = 3.0693e-02, PNorm = 71.4732, GNorm = 0.4470, lr_0 = 1.0879e-04
Loss = 3.4316e-02, PNorm = 71.4742, GNorm = 0.5061, lr_0 = 1.0871e-04
Loss = 3.4422e-02, PNorm = 71.4761, GNorm = 0.7305, lr_0 = 1.0864e-04
Loss = 3.2306e-02, PNorm = 71.4773, GNorm = 0.5229, lr_0 = 1.0856e-04
Validation mae = 0.396892
Epoch 29
Loss = 3.2615e-02, PNorm = 71.4788, GNorm = 0.5432, lr_0 = 1.0849e-04
Loss = 3.2294e-02, PNorm = 71.4799, GNorm = 0.5361, lr_0 = 1.0841e-04
Loss = 2.7022e-02, PNorm = 71.4806, GNorm = 0.4587, lr_0 = 1.0834e-04
Loss = 2.6083e-02, PNorm = 71.4828, GNorm = 0.6235, lr_0 = 1.0827e-04
Loss = 2.9983e-02, PNorm = 71.4847, GNorm = 0.6827, lr_0 = 1.0819e-04
Loss = 2.4885e-02, PNorm = 71.4864, GNorm = 0.2859, lr_0 = 1.0812e-04
Loss = 2.9787e-02, PNorm = 71.4875, GNorm = 0.4007, lr_0 = 1.0804e-04
Loss = 2.7752e-02, PNorm = 71.4887, GNorm = 0.5320, lr_0 = 1.0797e-04
Loss = 2.4284e-02, PNorm = 71.4904, GNorm = 0.5431, lr_0 = 1.0790e-04
Loss = 2.7583e-02, PNorm = 71.4928, GNorm = 0.5020, lr_0 = 1.0782e-04
Loss = 2.7999e-02, PNorm = 71.4949, GNorm = 0.5955, lr_0 = 1.0775e-04
Loss = 2.4613e-02, PNorm = 71.4970, GNorm = 0.3780, lr_0 = 1.0767e-04
Loss = 2.3282e-02, PNorm = 71.4986, GNorm = 0.4259, lr_0 = 1.0760e-04
Loss = 2.4651e-02, PNorm = 71.5001, GNorm = 0.3432, lr_0 = 1.0753e-04
Loss = 2.4611e-02, PNorm = 71.5013, GNorm = 0.4627, lr_0 = 1.0745e-04
Loss = 3.0596e-02, PNorm = 71.5027, GNorm = 0.4069, lr_0 = 1.0738e-04
Loss = 3.1819e-02, PNorm = 71.5043, GNorm = 0.4362, lr_0 = 1.0731e-04
Loss = 3.8780e-02, PNorm = 71.5067, GNorm = 0.4711, lr_0 = 1.0723e-04
Loss = 2.7327e-02, PNorm = 71.5089, GNorm = 0.3946, lr_0 = 1.0716e-04
Loss = 2.7307e-02, PNorm = 71.5102, GNorm = 0.4444, lr_0 = 1.0709e-04
Loss = 2.9141e-02, PNorm = 71.5117, GNorm = 0.4584, lr_0 = 1.0701e-04
Loss = 3.3865e-02, PNorm = 71.5137, GNorm = 0.3994, lr_0 = 1.0694e-04
Loss = 3.0832e-02, PNorm = 71.5150, GNorm = 0.4372, lr_0 = 1.0687e-04
Loss = 2.6706e-02, PNorm = 71.5164, GNorm = 0.4742, lr_0 = 1.0679e-04
Loss = 2.8382e-02, PNorm = 71.5171, GNorm = 0.4712, lr_0 = 1.0672e-04
Loss = 2.5041e-02, PNorm = 71.5178, GNorm = 0.4934, lr_0 = 1.0665e-04
Loss = 2.9178e-02, PNorm = 71.5194, GNorm = 0.4201, lr_0 = 1.0657e-04
Loss = 2.7937e-02, PNorm = 71.5206, GNorm = 0.4241, lr_0 = 1.0650e-04
Loss = 3.3641e-02, PNorm = 71.5217, GNorm = 0.6579, lr_0 = 1.0643e-04
Loss = 3.2486e-02, PNorm = 71.5244, GNorm = 0.4363, lr_0 = 1.0635e-04
Loss = 2.7862e-02, PNorm = 71.5261, GNorm = 0.5498, lr_0 = 1.0628e-04
Loss = 2.9684e-02, PNorm = 71.5267, GNorm = 0.4708, lr_0 = 1.0621e-04
Loss = 2.8150e-02, PNorm = 71.5274, GNorm = 0.3491, lr_0 = 1.0614e-04
Loss = 2.9027e-02, PNorm = 71.5295, GNorm = 0.4397, lr_0 = 1.0606e-04
Loss = 2.9832e-02, PNorm = 71.5312, GNorm = 0.3816, lr_0 = 1.0599e-04
Loss = 2.7055e-02, PNorm = 71.5328, GNorm = 0.6468, lr_0 = 1.0592e-04
Loss = 2.4878e-02, PNorm = 71.5342, GNorm = 0.4155, lr_0 = 1.0585e-04
Loss = 3.3780e-02, PNorm = 71.5354, GNorm = 0.4665, lr_0 = 1.0577e-04
Loss = 2.9241e-02, PNorm = 71.5372, GNorm = 0.6031, lr_0 = 1.0570e-04
Loss = 3.1652e-02, PNorm = 71.5393, GNorm = 0.3720, lr_0 = 1.0563e-04
Loss = 3.1319e-02, PNorm = 71.5407, GNorm = 0.6231, lr_0 = 1.0556e-04
Loss = 2.8980e-02, PNorm = 71.5427, GNorm = 0.5088, lr_0 = 1.0548e-04
Loss = 3.1248e-02, PNorm = 71.5443, GNorm = 0.4717, lr_0 = 1.0541e-04
Loss = 2.6696e-02, PNorm = 71.5463, GNorm = 0.3989, lr_0 = 1.0534e-04
Loss = 2.4327e-02, PNorm = 71.5477, GNorm = 0.3318, lr_0 = 1.0527e-04
Loss = 2.9909e-02, PNorm = 71.5492, GNorm = 0.3776, lr_0 = 1.0519e-04
Loss = 2.9025e-02, PNorm = 71.5510, GNorm = 0.3998, lr_0 = 1.0512e-04
Loss = 3.0719e-02, PNorm = 71.5520, GNorm = 0.3854, lr_0 = 1.0505e-04
Loss = 2.6523e-02, PNorm = 71.5531, GNorm = 0.4422, lr_0 = 1.0498e-04
Loss = 2.8442e-02, PNorm = 71.5536, GNorm = 0.4480, lr_0 = 1.0491e-04
Loss = 2.8698e-02, PNorm = 71.5549, GNorm = 0.4134, lr_0 = 1.0483e-04
Loss = 3.0568e-02, PNorm = 71.5563, GNorm = 0.4343, lr_0 = 1.0476e-04
Loss = 3.4429e-02, PNorm = 71.5581, GNorm = 0.4320, lr_0 = 1.0469e-04
Loss = 3.2122e-02, PNorm = 71.5597, GNorm = 0.6090, lr_0 = 1.0462e-04
Loss = 3.0055e-02, PNorm = 71.5614, GNorm = 0.4910, lr_0 = 1.0455e-04
Loss = 3.5575e-02, PNorm = 71.5625, GNorm = 0.5318, lr_0 = 1.0448e-04
Loss = 2.6619e-02, PNorm = 71.5634, GNorm = 0.4902, lr_0 = 1.0440e-04
Loss = 3.1811e-02, PNorm = 71.5646, GNorm = 0.4855, lr_0 = 1.0433e-04
Loss = 2.9167e-02, PNorm = 71.5657, GNorm = 0.4926, lr_0 = 1.0426e-04
Loss = 3.0996e-02, PNorm = 71.5665, GNorm = 0.4194, lr_0 = 1.0419e-04
Loss = 3.0833e-02, PNorm = 71.5681, GNorm = 0.4529, lr_0 = 1.0412e-04
Loss = 2.6795e-02, PNorm = 71.5693, GNorm = 0.9023, lr_0 = 1.0405e-04
Loss = 3.1735e-02, PNorm = 71.5713, GNorm = 0.6801, lr_0 = 1.0398e-04
Loss = 2.7930e-02, PNorm = 71.5726, GNorm = 0.4454, lr_0 = 1.0391e-04
Loss = 3.4850e-02, PNorm = 71.5737, GNorm = 0.4531, lr_0 = 1.0383e-04
Loss = 2.9618e-02, PNorm = 71.5758, GNorm = 0.4090, lr_0 = 1.0376e-04
Loss = 3.1932e-02, PNorm = 71.5778, GNorm = 0.4925, lr_0 = 1.0369e-04
Loss = 3.1577e-02, PNorm = 71.5803, GNorm = 0.4636, lr_0 = 1.0362e-04
Loss = 2.4365e-02, PNorm = 71.5816, GNorm = 0.3236, lr_0 = 1.0355e-04
Loss = 2.6260e-02, PNorm = 71.5822, GNorm = 0.5331, lr_0 = 1.0348e-04
Loss = 2.9578e-02, PNorm = 71.5825, GNorm = 0.4738, lr_0 = 1.0341e-04
Loss = 3.1568e-02, PNorm = 71.5829, GNorm = 0.4668, lr_0 = 1.0334e-04
Loss = 2.9828e-02, PNorm = 71.5841, GNorm = 0.4230, lr_0 = 1.0327e-04
Loss = 3.0472e-02, PNorm = 71.5853, GNorm = 0.4197, lr_0 = 1.0320e-04
Loss = 3.0010e-02, PNorm = 71.5870, GNorm = 0.4818, lr_0 = 1.0312e-04
Loss = 2.8970e-02, PNorm = 71.5887, GNorm = 0.5320, lr_0 = 1.0305e-04
Loss = 3.3030e-02, PNorm = 71.5908, GNorm = 0.5044, lr_0 = 1.0298e-04
Loss = 3.7309e-02, PNorm = 71.5929, GNorm = 0.3747, lr_0 = 1.0291e-04
Loss = 3.0158e-02, PNorm = 71.5946, GNorm = 0.3977, lr_0 = 1.0284e-04
Loss = 3.2006e-02, PNorm = 71.5964, GNorm = 0.5797, lr_0 = 1.0277e-04
Loss = 3.0151e-02, PNorm = 71.5980, GNorm = 0.5140, lr_0 = 1.0270e-04
Loss = 2.7479e-02, PNorm = 71.5996, GNorm = 0.3774, lr_0 = 1.0263e-04
Loss = 3.2722e-02, PNorm = 71.6019, GNorm = 0.4662, lr_0 = 1.0256e-04
Loss = 2.5976e-02, PNorm = 71.6034, GNorm = 0.3947, lr_0 = 1.0249e-04
Loss = 3.9357e-02, PNorm = 71.6052, GNorm = 0.5584, lr_0 = 1.0242e-04
Loss = 3.3853e-02, PNorm = 71.6066, GNorm = 0.6058, lr_0 = 1.0235e-04
Loss = 2.8649e-02, PNorm = 71.6089, GNorm = 0.4605, lr_0 = 1.0228e-04
Loss = 2.6038e-02, PNorm = 71.6100, GNorm = 0.3473, lr_0 = 1.0221e-04
Loss = 2.8151e-02, PNorm = 71.6102, GNorm = 0.4641, lr_0 = 1.0214e-04
Loss = 2.8866e-02, PNorm = 71.6109, GNorm = 0.5713, lr_0 = 1.0207e-04
Loss = 3.2861e-02, PNorm = 71.6127, GNorm = 0.4508, lr_0 = 1.0200e-04
Loss = 2.9553e-02, PNorm = 71.6149, GNorm = 0.4477, lr_0 = 1.0193e-04
Loss = 3.1804e-02, PNorm = 71.6167, GNorm = 0.5921, lr_0 = 1.0186e-04
Loss = 3.2601e-02, PNorm = 71.6181, GNorm = 0.4104, lr_0 = 1.0179e-04
Loss = 2.8814e-02, PNorm = 71.6189, GNorm = 0.5297, lr_0 = 1.0172e-04
Loss = 2.6307e-02, PNorm = 71.6191, GNorm = 0.4195, lr_0 = 1.0165e-04
Loss = 2.9490e-02, PNorm = 71.6199, GNorm = 0.5417, lr_0 = 1.0158e-04
Loss = 3.0591e-02, PNorm = 71.6213, GNorm = 0.4226, lr_0 = 1.0151e-04
Loss = 2.7262e-02, PNorm = 71.6221, GNorm = 0.6592, lr_0 = 1.0144e-04
Loss = 3.1507e-02, PNorm = 71.6238, GNorm = 0.4729, lr_0 = 1.0137e-04
Loss = 2.9971e-02, PNorm = 71.6254, GNorm = 0.4042, lr_0 = 1.0130e-04
Loss = 2.8406e-02, PNorm = 71.6264, GNorm = 0.4637, lr_0 = 1.0123e-04
Loss = 3.2573e-02, PNorm = 71.6269, GNorm = 0.3838, lr_0 = 1.0116e-04
Loss = 3.5086e-02, PNorm = 71.6279, GNorm = 0.8563, lr_0 = 1.0110e-04
Loss = 3.3356e-02, PNorm = 71.6296, GNorm = 0.4275, lr_0 = 1.0103e-04
Loss = 2.8930e-02, PNorm = 71.6315, GNorm = 0.4600, lr_0 = 1.0096e-04
Loss = 2.9619e-02, PNorm = 71.6337, GNorm = 0.4044, lr_0 = 1.0089e-04
Loss = 3.1524e-02, PNorm = 71.6349, GNorm = 0.6739, lr_0 = 1.0082e-04
Loss = 2.8630e-02, PNorm = 71.6353, GNorm = 0.4561, lr_0 = 1.0075e-04
Loss = 2.9771e-02, PNorm = 71.6362, GNorm = 0.5576, lr_0 = 1.0068e-04
Loss = 2.4719e-02, PNorm = 71.6365, GNorm = 0.4655, lr_0 = 1.0061e-04
Loss = 2.5641e-02, PNorm = 71.6366, GNorm = 0.4911, lr_0 = 1.0054e-04
Loss = 3.0300e-02, PNorm = 71.6377, GNorm = 0.6281, lr_0 = 1.0047e-04
Loss = 3.2866e-02, PNorm = 71.6389, GNorm = 0.4865, lr_0 = 1.0041e-04
Loss = 3.1019e-02, PNorm = 71.6407, GNorm = 0.6019, lr_0 = 1.0034e-04
Loss = 2.6201e-02, PNorm = 71.6418, GNorm = 0.5550, lr_0 = 1.0027e-04
Loss = 3.0393e-02, PNorm = 71.6427, GNorm = 0.6567, lr_0 = 1.0020e-04
Loss = 3.0164e-02, PNorm = 71.6430, GNorm = 0.4864, lr_0 = 1.0013e-04
Loss = 3.0255e-02, PNorm = 71.6431, GNorm = 0.6024, lr_0 = 1.0006e-04
Loss = 3.3321e-02, PNorm = 71.6439, GNorm = 0.6069, lr_0 = 1.0000e-04
Validation mae = 0.393244
Model 0 best validation mae = 0.386821 on epoch 17
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Moving model to cuda
Model 0 test mae = 0.381340
Ensemble test mae = 0.381340
Fold 4
Splitting data with seed 4
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.0, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=400, bias=False)
        (W_h): Linear(in_features=400, out_features=400, bias=False)
        (W_o): Linear(in_features=533, out_features=400, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=400, out_features=400, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=400, out_features=1, bias=True)
  )
)
Number of parameters = 593,201
Moving model to cuda
Epoch 0
Loss = 8.6313e-01, PNorm = 38.3710, GNorm = 3.9380, lr_0 = 1.0413e-04
Loss = 8.5021e-01, PNorm = 38.3734, GNorm = 7.0193, lr_0 = 1.0788e-04
Loss = 7.7811e-01, PNorm = 38.3752, GNorm = 6.1883, lr_0 = 1.1163e-04
Loss = 7.3780e-01, PNorm = 38.3774, GNorm = 1.7174, lr_0 = 1.1537e-04
Loss = 7.4363e-01, PNorm = 38.3809, GNorm = 2.4036, lr_0 = 1.1913e-04
Loss = 7.1426e-01, PNorm = 38.3846, GNorm = 1.8123, lr_0 = 1.2287e-04
Loss = 6.5226e-01, PNorm = 38.3890, GNorm = 1.1341, lr_0 = 1.2663e-04
Loss = 5.7684e-01, PNorm = 38.3943, GNorm = 2.8897, lr_0 = 1.3038e-04
Loss = 6.0243e-01, PNorm = 38.3999, GNorm = 8.9633, lr_0 = 1.3413e-04
Loss = 5.8705e-01, PNorm = 38.4064, GNorm = 10.1422, lr_0 = 1.3788e-04
Loss = 5.7728e-01, PNorm = 38.4124, GNorm = 18.6443, lr_0 = 1.4163e-04
Loss = 5.4088e-01, PNorm = 38.4165, GNorm = 11.2882, lr_0 = 1.4537e-04
Loss = 4.4824e-01, PNorm = 38.4221, GNorm = 5.6312, lr_0 = 1.4913e-04
Loss = 4.4142e-01, PNorm = 38.4285, GNorm = 6.7565, lr_0 = 1.5288e-04
Loss = 4.6024e-01, PNorm = 38.4349, GNorm = 4.6529, lr_0 = 1.5662e-04
Loss = 4.7126e-01, PNorm = 38.4395, GNorm = 4.2021, lr_0 = 1.6038e-04
Loss = 5.3670e-01, PNorm = 38.4439, GNorm = 14.2617, lr_0 = 1.6412e-04
Loss = 4.8876e-01, PNorm = 38.4499, GNorm = 8.6721, lr_0 = 1.6788e-04
Loss = 5.4000e-01, PNorm = 38.4560, GNorm = 6.0310, lr_0 = 1.7163e-04
Loss = 5.1711e-01, PNorm = 38.4620, GNorm = 1.9119, lr_0 = 1.7538e-04
Loss = 3.8537e-01, PNorm = 38.4677, GNorm = 10.0480, lr_0 = 1.7913e-04
Loss = 4.7024e-01, PNorm = 38.4741, GNorm = 24.7743, lr_0 = 1.8288e-04
Loss = 4.2060e-01, PNorm = 38.4798, GNorm = 5.0732, lr_0 = 1.8662e-04
Loss = 3.9117e-01, PNorm = 38.4870, GNorm = 9.5136, lr_0 = 1.9038e-04
Loss = 3.8907e-01, PNorm = 38.4937, GNorm = 13.2712, lr_0 = 1.9413e-04
Loss = 4.5340e-01, PNorm = 38.4981, GNorm = 19.3719, lr_0 = 1.9788e-04
Loss = 5.2374e-01, PNorm = 38.5020, GNorm = 10.2106, lr_0 = 2.0163e-04
Loss = 5.1025e-01, PNorm = 38.5082, GNorm = 8.0377, lr_0 = 2.0537e-04
Loss = 3.7177e-01, PNorm = 38.5154, GNorm = 5.6517, lr_0 = 2.0913e-04
Loss = 3.8711e-01, PNorm = 38.5227, GNorm = 5.1377, lr_0 = 2.1288e-04
Loss = 3.8330e-01, PNorm = 38.5301, GNorm = 15.0518, lr_0 = 2.1663e-04
Loss = 3.4214e-01, PNorm = 38.5379, GNorm = 4.5100, lr_0 = 2.2038e-04
Loss = 4.3613e-01, PNorm = 38.5463, GNorm = 21.5257, lr_0 = 2.2412e-04
Loss = 3.4980e-01, PNorm = 38.5530, GNorm = 8.7220, lr_0 = 2.2787e-04
Loss = 3.7651e-01, PNorm = 38.5611, GNorm = 15.7398, lr_0 = 2.3163e-04
Loss = 3.9111e-01, PNorm = 38.5661, GNorm = 24.6990, lr_0 = 2.3538e-04
Loss = 3.4719e-01, PNorm = 38.5703, GNorm = 15.5302, lr_0 = 2.3913e-04
Loss = 3.6489e-01, PNorm = 38.5772, GNorm = 21.4029, lr_0 = 2.4288e-04
Loss = 4.3648e-01, PNorm = 38.5821, GNorm = 13.5248, lr_0 = 2.4662e-04
Loss = 4.5837e-01, PNorm = 38.5875, GNorm = 2.7837, lr_0 = 2.5038e-04
Loss = 4.1871e-01, PNorm = 38.5950, GNorm = 2.2523, lr_0 = 2.5413e-04
Loss = 3.9383e-01, PNorm = 38.6018, GNorm = 18.5607, lr_0 = 2.5788e-04
Loss = 3.8920e-01, PNorm = 38.6057, GNorm = 3.9127, lr_0 = 2.6163e-04
Loss = 3.3590e-01, PNorm = 38.6145, GNorm = 3.3421, lr_0 = 2.6537e-04
Loss = 3.5488e-01, PNorm = 38.6236, GNorm = 5.6380, lr_0 = 2.6912e-04
Loss = 3.1520e-01, PNorm = 38.6345, GNorm = 12.9258, lr_0 = 2.7288e-04
Loss = 3.2579e-01, PNorm = 38.6417, GNorm = 2.0074, lr_0 = 2.7663e-04
Loss = 3.6586e-01, PNorm = 38.6458, GNorm = 20.9219, lr_0 = 2.8038e-04
Loss = 3.6577e-01, PNorm = 38.6516, GNorm = 12.8312, lr_0 = 2.8413e-04
Loss = 3.0673e-01, PNorm = 38.6584, GNorm = 3.0339, lr_0 = 2.8787e-04
Loss = 3.2558e-01, PNorm = 38.6635, GNorm = 1.3452, lr_0 = 2.9163e-04
Loss = 3.5546e-01, PNorm = 38.6705, GNorm = 9.5515, lr_0 = 2.9538e-04
Loss = 4.0740e-01, PNorm = 38.6761, GNorm = 19.1390, lr_0 = 2.9913e-04
Loss = 3.6908e-01, PNorm = 38.6806, GNorm = 12.9356, lr_0 = 3.0288e-04
Loss = 3.7818e-01, PNorm = 38.6875, GNorm = 5.3900, lr_0 = 3.0662e-04
Loss = 3.5171e-01, PNorm = 38.6960, GNorm = 6.6281, lr_0 = 3.1037e-04
Loss = 3.7351e-01, PNorm = 38.7049, GNorm = 1.9233, lr_0 = 3.1413e-04
Loss = 2.9389e-01, PNorm = 38.7129, GNorm = 11.6798, lr_0 = 3.1788e-04
Loss = 2.8374e-01, PNorm = 38.7190, GNorm = 7.2138, lr_0 = 3.2163e-04
Loss = 3.0609e-01, PNorm = 38.7255, GNorm = 1.8146, lr_0 = 3.2538e-04
Loss = 3.2895e-01, PNorm = 38.7321, GNorm = 18.2220, lr_0 = 3.2912e-04
Loss = 3.5742e-01, PNorm = 38.7403, GNorm = 2.8670, lr_0 = 3.3288e-04
Loss = 3.2106e-01, PNorm = 38.7509, GNorm = 7.0743, lr_0 = 3.3663e-04
Loss = 3.6135e-01, PNorm = 38.7567, GNorm = 15.3281, lr_0 = 3.4038e-04
Loss = 3.0794e-01, PNorm = 38.7635, GNorm = 18.1192, lr_0 = 3.4413e-04
Loss = 2.9867e-01, PNorm = 38.7741, GNorm = 1.9658, lr_0 = 3.4787e-04
Loss = 3.4567e-01, PNorm = 38.7810, GNorm = 6.0817, lr_0 = 3.5162e-04
Loss = 3.3648e-01, PNorm = 38.7890, GNorm = 12.7283, lr_0 = 3.5538e-04
Loss = 4.2789e-01, PNorm = 38.7961, GNorm = 14.4833, lr_0 = 3.5913e-04
Loss = 4.1403e-01, PNorm = 38.8038, GNorm = 7.9779, lr_0 = 3.6288e-04
Loss = 4.0126e-01, PNorm = 38.8138, GNorm = 6.2198, lr_0 = 3.6662e-04
Loss = 3.5648e-01, PNorm = 38.8268, GNorm = 3.8112, lr_0 = 3.7037e-04
Loss = 2.9818e-01, PNorm = 38.8393, GNorm = 2.1796, lr_0 = 3.7413e-04
Loss = 2.9413e-01, PNorm = 38.8529, GNorm = 4.5535, lr_0 = 3.7788e-04
Loss = 3.1105e-01, PNorm = 38.8630, GNorm = 8.8090, lr_0 = 3.8163e-04
Loss = 2.9739e-01, PNorm = 38.8729, GNorm = 3.8283, lr_0 = 3.8537e-04
Loss = 3.8090e-01, PNorm = 38.8769, GNorm = 11.2883, lr_0 = 3.8912e-04
Loss = 3.2212e-01, PNorm = 38.8816, GNorm = 7.7200, lr_0 = 3.9287e-04
Loss = 3.4636e-01, PNorm = 38.8894, GNorm = 8.7389, lr_0 = 3.9663e-04
Loss = 3.1280e-01, PNorm = 38.8995, GNorm = 3.3675, lr_0 = 4.0038e-04
Loss = 3.5051e-01, PNorm = 38.9097, GNorm = 6.5097, lr_0 = 4.0413e-04
Loss = 3.1720e-01, PNorm = 38.9225, GNorm = 2.1541, lr_0 = 4.0787e-04
Loss = 3.3163e-01, PNorm = 38.9308, GNorm = 9.0274, lr_0 = 4.1162e-04
Loss = 3.1401e-01, PNorm = 38.9390, GNorm = 5.9087, lr_0 = 4.1537e-04
Loss = 2.5099e-01, PNorm = 38.9493, GNorm = 1.5110, lr_0 = 4.1913e-04
Loss = 3.2450e-01, PNorm = 38.9614, GNorm = 1.4684, lr_0 = 4.2288e-04
Loss = 2.9790e-01, PNorm = 38.9680, GNorm = 15.1942, lr_0 = 4.2662e-04
Loss = 3.0770e-01, PNorm = 38.9780, GNorm = 6.8405, lr_0 = 4.3037e-04
Loss = 3.2725e-01, PNorm = 38.9891, GNorm = 3.7977, lr_0 = 4.3412e-04
Loss = 2.5273e-01, PNorm = 39.0011, GNorm = 2.7044, lr_0 = 4.3788e-04
Loss = 2.7795e-01, PNorm = 39.0082, GNorm = 4.0444, lr_0 = 4.4163e-04
Loss = 2.9362e-01, PNorm = 39.0198, GNorm = 7.1499, lr_0 = 4.4538e-04
Loss = 3.4175e-01, PNorm = 39.0339, GNorm = 14.4980, lr_0 = 4.4912e-04
Loss = 3.0659e-01, PNorm = 39.0472, GNorm = 2.0274, lr_0 = 4.5287e-04
Loss = 2.6988e-01, PNorm = 39.0611, GNorm = 2.3464, lr_0 = 4.5662e-04
Loss = 3.8077e-01, PNorm = 39.0683, GNorm = 10.9147, lr_0 = 4.6038e-04
Loss = 2.9548e-01, PNorm = 39.0784, GNorm = 4.0535, lr_0 = 4.6413e-04
Loss = 2.7245e-01, PNorm = 39.0909, GNorm = 4.5481, lr_0 = 4.6787e-04
Loss = 2.8421e-01, PNorm = 39.1044, GNorm = 3.8581, lr_0 = 4.7162e-04
Loss = 2.9175e-01, PNorm = 39.1146, GNorm = 2.1295, lr_0 = 4.7537e-04
Loss = 2.8282e-01, PNorm = 39.1242, GNorm = 5.7898, lr_0 = 4.7913e-04
Loss = 3.5003e-01, PNorm = 39.1386, GNorm = 1.3591, lr_0 = 4.8288e-04
Loss = 2.6683e-01, PNorm = 39.1479, GNorm = 6.4149, lr_0 = 4.8663e-04
Loss = 3.1378e-01, PNorm = 39.1643, GNorm = 4.2785, lr_0 = 4.9038e-04
Loss = 2.9025e-01, PNorm = 39.1762, GNorm = 1.6940, lr_0 = 4.9412e-04
Loss = 3.0118e-01, PNorm = 39.1907, GNorm = 7.0169, lr_0 = 4.9788e-04
Loss = 3.2366e-01, PNorm = 39.1984, GNorm = 4.3176, lr_0 = 5.0163e-04
Loss = 3.0593e-01, PNorm = 39.2089, GNorm = 7.3970, lr_0 = 5.0538e-04
Loss = 3.4519e-01, PNorm = 39.2187, GNorm = 5.0626, lr_0 = 5.0913e-04
Loss = 2.8386e-01, PNorm = 39.2307, GNorm = 5.2953, lr_0 = 5.1287e-04
Loss = 2.7476e-01, PNorm = 39.2444, GNorm = 1.4822, lr_0 = 5.1663e-04
Loss = 2.5926e-01, PNorm = 39.2603, GNorm = 4.3354, lr_0 = 5.2038e-04
Loss = 2.7576e-01, PNorm = 39.2648, GNorm = 6.6555, lr_0 = 5.2413e-04
Loss = 3.1314e-01, PNorm = 39.2743, GNorm = 17.0951, lr_0 = 5.2788e-04
Loss = 3.4229e-01, PNorm = 39.2846, GNorm = 11.0081, lr_0 = 5.3162e-04
Loss = 3.1285e-01, PNorm = 39.3002, GNorm = 4.2430, lr_0 = 5.3538e-04
Loss = 2.9411e-01, PNorm = 39.3184, GNorm = 6.0092, lr_0 = 5.3912e-04
Loss = 2.6396e-01, PNorm = 39.3347, GNorm = 3.7669, lr_0 = 5.4288e-04
Loss = 2.4754e-01, PNorm = 39.3504, GNorm = 1.5646, lr_0 = 5.4663e-04
Loss = 2.7944e-01, PNorm = 39.3604, GNorm = 1.9928, lr_0 = 5.5038e-04
Validation mae = 0.582772
Epoch 1
Loss = 2.5190e-01, PNorm = 39.3744, GNorm = 8.3784, lr_0 = 5.5413e-04
Loss = 2.9808e-01, PNorm = 39.3886, GNorm = 1.2463, lr_0 = 5.5787e-04
Loss = 2.9856e-01, PNorm = 39.4046, GNorm = 1.7633, lr_0 = 5.6163e-04
Loss = 2.4089e-01, PNorm = 39.4223, GNorm = 3.5105, lr_0 = 5.6538e-04
Loss = 2.5753e-01, PNorm = 39.4342, GNorm = 1.8356, lr_0 = 5.6913e-04
Loss = 2.3250e-01, PNorm = 39.4502, GNorm = 5.9349, lr_0 = 5.7288e-04
Loss = 3.5725e-01, PNorm = 39.4670, GNorm = 2.7128, lr_0 = 5.7662e-04
Loss = 4.3808e-01, PNorm = 39.4792, GNorm = 9.9560, lr_0 = 5.8038e-04
Loss = 3.5629e-01, PNorm = 39.5028, GNorm = 1.4301, lr_0 = 5.8413e-04
Loss = 3.2449e-01, PNorm = 39.5269, GNorm = 4.4148, lr_0 = 5.8788e-04
Loss = 2.9543e-01, PNorm = 39.5475, GNorm = 4.6154, lr_0 = 5.9163e-04
Loss = 2.9545e-01, PNorm = 39.5646, GNorm = 1.5707, lr_0 = 5.9538e-04
Loss = 2.7647e-01, PNorm = 39.5813, GNorm = 1.0265, lr_0 = 5.9913e-04
Loss = 2.3422e-01, PNorm = 39.5905, GNorm = 4.7335, lr_0 = 6.0288e-04
Loss = 2.3863e-01, PNorm = 39.6002, GNorm = 6.9132, lr_0 = 6.0663e-04
Loss = 2.7858e-01, PNorm = 39.6165, GNorm = 1.1841, lr_0 = 6.1038e-04
Loss = 2.7263e-01, PNorm = 39.6317, GNorm = 2.6294, lr_0 = 6.1413e-04
Loss = 2.7303e-01, PNorm = 39.6463, GNorm = 1.9121, lr_0 = 6.1788e-04
Loss = 2.5988e-01, PNorm = 39.6591, GNorm = 3.4889, lr_0 = 6.2163e-04
Loss = 2.7199e-01, PNorm = 39.6719, GNorm = 9.9995, lr_0 = 6.2538e-04
Loss = 2.9063e-01, PNorm = 39.6864, GNorm = 1.6343, lr_0 = 6.2913e-04
Loss = 2.6677e-01, PNorm = 39.7055, GNorm = 3.1056, lr_0 = 6.3288e-04
Loss = 2.4857e-01, PNorm = 39.7203, GNorm = 5.4436, lr_0 = 6.3663e-04
Loss = 2.6397e-01, PNorm = 39.7350, GNorm = 2.9375, lr_0 = 6.4038e-04
Loss = 2.5772e-01, PNorm = 39.7483, GNorm = 2.8906, lr_0 = 6.4413e-04
Loss = 2.4235e-01, PNorm = 39.7640, GNorm = 5.5373, lr_0 = 6.4788e-04
Loss = 2.4488e-01, PNorm = 39.7760, GNorm = 11.6204, lr_0 = 6.5163e-04
Loss = 2.5433e-01, PNorm = 39.7900, GNorm = 1.9679, lr_0 = 6.5538e-04
Loss = 2.5911e-01, PNorm = 39.8091, GNorm = 2.9022, lr_0 = 6.5913e-04
Loss = 2.6621e-01, PNorm = 39.8237, GNorm = 5.2279, lr_0 = 6.6288e-04
Loss = 2.9278e-01, PNorm = 39.8406, GNorm = 9.0157, lr_0 = 6.6663e-04
Loss = 2.2344e-01, PNorm = 39.8563, GNorm = 3.0160, lr_0 = 6.7038e-04
Loss = 2.4085e-01, PNorm = 39.8760, GNorm = 0.9794, lr_0 = 6.7413e-04
Loss = 2.5846e-01, PNorm = 39.8904, GNorm = 8.6730, lr_0 = 6.7788e-04
Loss = 2.5161e-01, PNorm = 39.8995, GNorm = 0.9313, lr_0 = 6.8163e-04
Loss = 2.4398e-01, PNorm = 39.9175, GNorm = 2.4665, lr_0 = 6.8538e-04
Loss = 2.6777e-01, PNorm = 39.9394, GNorm = 1.4063, lr_0 = 6.8913e-04
Loss = 2.7594e-01, PNorm = 39.9566, GNorm = 5.1709, lr_0 = 6.9288e-04
Loss = 2.7828e-01, PNorm = 39.9776, GNorm = 5.6196, lr_0 = 6.9663e-04
Loss = 2.1822e-01, PNorm = 40.0019, GNorm = 1.4828, lr_0 = 7.0038e-04
Loss = 2.3508e-01, PNorm = 40.0246, GNorm = 0.8111, lr_0 = 7.0413e-04
Loss = 2.7812e-01, PNorm = 40.0419, GNorm = 13.8570, lr_0 = 7.0788e-04
Loss = 2.8206e-01, PNorm = 40.0583, GNorm = 2.9039, lr_0 = 7.1163e-04
Loss = 2.7482e-01, PNorm = 40.0802, GNorm = 6.1250, lr_0 = 7.1538e-04
Loss = 2.7975e-01, PNorm = 40.1068, GNorm = 10.9630, lr_0 = 7.1913e-04
Loss = 2.2886e-01, PNorm = 40.1279, GNorm = 3.5884, lr_0 = 7.2288e-04
Loss = 2.9649e-01, PNorm = 40.1471, GNorm = 9.9993, lr_0 = 7.2663e-04
Loss = 3.3602e-01, PNorm = 40.1682, GNorm = 0.9693, lr_0 = 7.3038e-04
Loss = 2.8435e-01, PNorm = 40.1964, GNorm = 7.6042, lr_0 = 7.3413e-04
Loss = 2.5525e-01, PNorm = 40.2204, GNorm = 1.0521, lr_0 = 7.3788e-04
Loss = 2.3624e-01, PNorm = 40.2401, GNorm = 3.5074, lr_0 = 7.4163e-04
Loss = 2.6796e-01, PNorm = 40.2581, GNorm = 3.0646, lr_0 = 7.4538e-04
Loss = 2.5152e-01, PNorm = 40.2756, GNorm = 1.0736, lr_0 = 7.4913e-04
Loss = 2.8477e-01, PNorm = 40.2969, GNorm = 7.3925, lr_0 = 7.5288e-04
Loss = 3.2495e-01, PNorm = 40.3268, GNorm = 1.0227, lr_0 = 7.5663e-04
Loss = 2.7080e-01, PNorm = 40.3563, GNorm = 4.5199, lr_0 = 7.6038e-04
Loss = 2.4958e-01, PNorm = 40.3794, GNorm = 4.4931, lr_0 = 7.6413e-04
Loss = 2.1450e-01, PNorm = 40.4019, GNorm = 1.2027, lr_0 = 7.6788e-04
Loss = 2.4780e-01, PNorm = 40.4265, GNorm = 2.5890, lr_0 = 7.7163e-04
Loss = 2.4221e-01, PNorm = 40.4476, GNorm = 1.1456, lr_0 = 7.7538e-04
Loss = 2.1043e-01, PNorm = 40.4641, GNorm = 2.8953, lr_0 = 7.7913e-04
Loss = 2.1957e-01, PNorm = 40.4864, GNorm = 1.0872, lr_0 = 7.8288e-04
Loss = 2.1458e-01, PNorm = 40.5045, GNorm = 2.6700, lr_0 = 7.8663e-04
Loss = 3.0086e-01, PNorm = 40.5215, GNorm = 3.2789, lr_0 = 7.9038e-04
Loss = 2.0658e-01, PNorm = 40.5332, GNorm = 1.0176, lr_0 = 7.9413e-04
Loss = 2.6336e-01, PNorm = 40.5606, GNorm = 1.4530, lr_0 = 7.9788e-04
Loss = 2.5985e-01, PNorm = 40.5762, GNorm = 9.6504, lr_0 = 8.0163e-04
Loss = 2.8571e-01, PNorm = 40.6012, GNorm = 4.3450, lr_0 = 8.0538e-04
Loss = 2.7172e-01, PNorm = 40.6327, GNorm = 2.1252, lr_0 = 8.0913e-04
Loss = 2.4341e-01, PNorm = 40.6678, GNorm = 3.0050, lr_0 = 8.1288e-04
Loss = 2.4101e-01, PNorm = 40.6896, GNorm = 1.6115, lr_0 = 8.1663e-04
Loss = 2.1662e-01, PNorm = 40.7103, GNorm = 1.3047, lr_0 = 8.2038e-04
Loss = 2.3342e-01, PNorm = 40.7316, GNorm = 1.9754, lr_0 = 8.2413e-04
Loss = 2.2444e-01, PNorm = 40.7457, GNorm = 4.8450, lr_0 = 8.2788e-04
Loss = 2.1257e-01, PNorm = 40.7723, GNorm = 2.4769, lr_0 = 8.3163e-04
Loss = 1.9268e-01, PNorm = 40.7906, GNorm = 1.0862, lr_0 = 8.3538e-04
Loss = 2.4648e-01, PNorm = 40.8081, GNorm = 7.1903, lr_0 = 8.3913e-04
Loss = 2.4885e-01, PNorm = 40.8293, GNorm = 2.0797, lr_0 = 8.4288e-04
Loss = 2.7016e-01, PNorm = 40.8488, GNorm = 4.5492, lr_0 = 8.4663e-04
Loss = 2.3428e-01, PNorm = 40.8776, GNorm = 5.2592, lr_0 = 8.5038e-04
Loss = 2.5151e-01, PNorm = 40.9029, GNorm = 3.9153, lr_0 = 8.5413e-04
Loss = 2.1410e-01, PNorm = 40.9238, GNorm = 3.4985, lr_0 = 8.5788e-04
Loss = 2.3420e-01, PNorm = 40.9469, GNorm = 2.4666, lr_0 = 8.6163e-04
Loss = 2.6255e-01, PNorm = 40.9641, GNorm = 2.3070, lr_0 = 8.6538e-04
Loss = 2.4813e-01, PNorm = 41.0005, GNorm = 5.3043, lr_0 = 8.6913e-04
Loss = 2.1518e-01, PNorm = 41.0204, GNorm = 1.0590, lr_0 = 8.7288e-04
Loss = 2.1959e-01, PNorm = 41.0441, GNorm = 6.2186, lr_0 = 8.7663e-04
Loss = 1.8922e-01, PNorm = 41.0624, GNorm = 5.6156, lr_0 = 8.8038e-04
Loss = 2.4930e-01, PNorm = 41.0833, GNorm = 3.1646, lr_0 = 8.8413e-04
Loss = 2.2984e-01, PNorm = 41.1154, GNorm = 3.9863, lr_0 = 8.8788e-04
Loss = 1.8918e-01, PNorm = 41.1485, GNorm = 1.3234, lr_0 = 8.9163e-04
Loss = 2.0066e-01, PNorm = 41.1692, GNorm = 1.1498, lr_0 = 8.9538e-04
Loss = 2.1782e-01, PNorm = 41.1914, GNorm = 5.7363, lr_0 = 8.9913e-04
Loss = 2.4374e-01, PNorm = 41.2072, GNorm = 5.0458, lr_0 = 9.0288e-04
Loss = 2.7551e-01, PNorm = 41.2397, GNorm = 1.6903, lr_0 = 9.0663e-04
Loss = 2.3737e-01, PNorm = 41.2801, GNorm = 1.0593, lr_0 = 9.1038e-04
Loss = 2.5636e-01, PNorm = 41.3128, GNorm = 1.6977, lr_0 = 9.1413e-04
Loss = 2.2333e-01, PNorm = 41.3368, GNorm = 1.2230, lr_0 = 9.1788e-04
Loss = 2.2094e-01, PNorm = 41.3560, GNorm = 2.5519, lr_0 = 9.2163e-04
Loss = 2.4202e-01, PNorm = 41.3821, GNorm = 6.2465, lr_0 = 9.2538e-04
Loss = 2.0448e-01, PNorm = 41.4122, GNorm = 1.5025, lr_0 = 9.2913e-04
Loss = 2.3455e-01, PNorm = 41.4377, GNorm = 1.2873, lr_0 = 9.3288e-04
Loss = 2.3807e-01, PNorm = 41.4627, GNorm = 1.8769, lr_0 = 9.3663e-04
Loss = 2.3961e-01, PNorm = 41.4895, GNorm = 5.7036, lr_0 = 9.4038e-04
Loss = 2.2498e-01, PNorm = 41.5158, GNorm = 1.7868, lr_0 = 9.4413e-04
Loss = 2.3235e-01, PNorm = 41.5485, GNorm = 2.8649, lr_0 = 9.4788e-04
Loss = 2.2706e-01, PNorm = 41.5812, GNorm = 1.2184, lr_0 = 9.5163e-04
Loss = 2.4979e-01, PNorm = 41.6131, GNorm = 3.7563, lr_0 = 9.5538e-04
Loss = 2.5322e-01, PNorm = 41.6503, GNorm = 3.7349, lr_0 = 9.5913e-04
Loss = 2.1682e-01, PNorm = 41.6788, GNorm = 3.8739, lr_0 = 9.6288e-04
Loss = 2.0077e-01, PNorm = 41.7111, GNorm = 3.1009, lr_0 = 9.6663e-04
Loss = 1.9753e-01, PNorm = 41.7363, GNorm = 1.6734, lr_0 = 9.7038e-04
Loss = 1.9707e-01, PNorm = 41.7577, GNorm = 0.5899, lr_0 = 9.7413e-04
Loss = 2.1007e-01, PNorm = 41.7728, GNorm = 3.7739, lr_0 = 9.7788e-04
Loss = 1.9884e-01, PNorm = 41.7911, GNorm = 1.9162, lr_0 = 9.8163e-04
Loss = 1.9767e-01, PNorm = 41.8067, GNorm = 3.5524, lr_0 = 9.8537e-04
Loss = 2.1794e-01, PNorm = 41.8265, GNorm = 1.7584, lr_0 = 9.8912e-04
Loss = 2.2646e-01, PNorm = 41.8475, GNorm = 4.4930, lr_0 = 9.9288e-04
Loss = 1.8909e-01, PNorm = 41.8733, GNorm = 0.8813, lr_0 = 9.9663e-04
Loss = 2.2007e-01, PNorm = 41.8962, GNorm = 3.1603, lr_0 = 9.9993e-04
Validation mae = 0.610046
Epoch 2
Loss = 2.2044e-01, PNorm = 41.9186, GNorm = 3.4384, lr_0 = 9.9925e-04
Loss = 2.0970e-01, PNorm = 41.9476, GNorm = 4.4807, lr_0 = 9.9856e-04
Loss = 2.2284e-01, PNorm = 41.9711, GNorm = 0.7978, lr_0 = 9.9788e-04
Loss = 2.0392e-01, PNorm = 41.9961, GNorm = 2.1690, lr_0 = 9.9719e-04
Loss = 2.2562e-01, PNorm = 42.0184, GNorm = 0.9993, lr_0 = 9.9651e-04
Loss = 2.3660e-01, PNorm = 42.0553, GNorm = 1.8706, lr_0 = 9.9583e-04
Loss = 2.3681e-01, PNorm = 42.0988, GNorm = 4.8198, lr_0 = 9.9515e-04
Loss = 2.0222e-01, PNorm = 42.1295, GNorm = 2.4165, lr_0 = 9.9446e-04
Loss = 2.0362e-01, PNorm = 42.1567, GNorm = 1.4015, lr_0 = 9.9378e-04
Loss = 2.1671e-01, PNorm = 42.1841, GNorm = 4.1732, lr_0 = 9.9310e-04
Loss = 2.0039e-01, PNorm = 42.2122, GNorm = 1.8216, lr_0 = 9.9242e-04
Loss = 2.1611e-01, PNorm = 42.2316, GNorm = 4.0705, lr_0 = 9.9174e-04
Loss = 2.0548e-01, PNorm = 42.2592, GNorm = 2.7881, lr_0 = 9.9106e-04
Loss = 2.0896e-01, PNorm = 42.2911, GNorm = 6.0918, lr_0 = 9.9038e-04
Loss = 2.0333e-01, PNorm = 42.3197, GNorm = 2.0439, lr_0 = 9.8971e-04
Loss = 2.0461e-01, PNorm = 42.3445, GNorm = 1.3559, lr_0 = 9.8903e-04
Loss = 2.6755e-01, PNorm = 42.3769, GNorm = 6.7609, lr_0 = 9.8835e-04
Loss = 2.4146e-01, PNorm = 42.4110, GNorm = 2.9089, lr_0 = 9.8767e-04
Loss = 1.9922e-01, PNorm = 42.4463, GNorm = 2.1868, lr_0 = 9.8700e-04
Loss = 1.8975e-01, PNorm = 42.4764, GNorm = 0.9370, lr_0 = 9.8632e-04
Loss = 2.1118e-01, PNorm = 42.5071, GNorm = 3.3411, lr_0 = 9.8564e-04
Loss = 1.8564e-01, PNorm = 42.5308, GNorm = 1.7536, lr_0 = 9.8497e-04
Loss = 1.9266e-01, PNorm = 42.5628, GNorm = 1.0821, lr_0 = 9.8429e-04
Loss = 1.9501e-01, PNorm = 42.5976, GNorm = 1.2212, lr_0 = 9.8362e-04
Loss = 2.1237e-01, PNorm = 42.6156, GNorm = 2.0071, lr_0 = 9.8295e-04
Loss = 2.0416e-01, PNorm = 42.6355, GNorm = 1.4954, lr_0 = 9.8227e-04
Loss = 2.5877e-01, PNorm = 42.6638, GNorm = 2.2592, lr_0 = 9.8160e-04
Loss = 2.0215e-01, PNorm = 42.7015, GNorm = 4.0593, lr_0 = 9.8093e-04
Loss = 2.2469e-01, PNorm = 42.7352, GNorm = 1.2601, lr_0 = 9.8026e-04
Loss = 2.1751e-01, PNorm = 42.7721, GNorm = 1.0923, lr_0 = 9.7958e-04
Loss = 1.9873e-01, PNorm = 42.7988, GNorm = 2.8271, lr_0 = 9.7891e-04
Loss = 2.5205e-01, PNorm = 42.8204, GNorm = 3.4023, lr_0 = 9.7824e-04
Loss = 1.9539e-01, PNorm = 42.8456, GNorm = 3.1975, lr_0 = 9.7757e-04
Loss = 1.9733e-01, PNorm = 42.8702, GNorm = 2.2145, lr_0 = 9.7690e-04
Loss = 2.2738e-01, PNorm = 42.8988, GNorm = 1.9927, lr_0 = 9.7623e-04
Loss = 2.1949e-01, PNorm = 42.9259, GNorm = 2.9707, lr_0 = 9.7556e-04
Loss = 2.2227e-01, PNorm = 42.9493, GNorm = 3.5242, lr_0 = 9.7490e-04
Loss = 2.0048e-01, PNorm = 42.9755, GNorm = 3.6012, lr_0 = 9.7423e-04
Loss = 2.3736e-01, PNorm = 43.0029, GNorm = 3.3169, lr_0 = 9.7356e-04
Loss = 2.2795e-01, PNorm = 43.0417, GNorm = 4.2593, lr_0 = 9.7289e-04
Loss = 2.2505e-01, PNorm = 43.0751, GNorm = 2.1791, lr_0 = 9.7223e-04
Loss = 2.1104e-01, PNorm = 43.1126, GNorm = 1.2274, lr_0 = 9.7156e-04
Loss = 2.5277e-01, PNorm = 43.1455, GNorm = 2.2431, lr_0 = 9.7090e-04
Loss = 2.1290e-01, PNorm = 43.1737, GNorm = 3.1413, lr_0 = 9.7023e-04
Loss = 2.1409e-01, PNorm = 43.2043, GNorm = 2.3619, lr_0 = 9.6957e-04
Loss = 1.6664e-01, PNorm = 43.2297, GNorm = 0.8053, lr_0 = 9.6890e-04
Loss = 1.7891e-01, PNorm = 43.2530, GNorm = 2.0342, lr_0 = 9.6824e-04
Loss = 1.9808e-01, PNorm = 43.2709, GNorm = 3.0371, lr_0 = 9.6757e-04
Loss = 2.0008e-01, PNorm = 43.2945, GNorm = 1.9482, lr_0 = 9.6691e-04
Loss = 2.0557e-01, PNorm = 43.3201, GNorm = 3.1808, lr_0 = 9.6625e-04
Loss = 2.0983e-01, PNorm = 43.3440, GNorm = 3.7120, lr_0 = 9.6559e-04
Loss = 1.8375e-01, PNorm = 43.3777, GNorm = 2.5282, lr_0 = 9.6493e-04
Loss = 2.0628e-01, PNorm = 43.3970, GNorm = 2.2545, lr_0 = 9.6427e-04
Loss = 1.7894e-01, PNorm = 43.4236, GNorm = 1.1419, lr_0 = 9.6360e-04
Loss = 1.8048e-01, PNorm = 43.4525, GNorm = 1.1985, lr_0 = 9.6294e-04
Loss = 1.9717e-01, PNorm = 43.4793, GNorm = 1.5327, lr_0 = 9.6228e-04
Loss = 2.0681e-01, PNorm = 43.5066, GNorm = 2.3020, lr_0 = 9.6163e-04
Loss = 2.4113e-01, PNorm = 43.5369, GNorm = 3.5467, lr_0 = 9.6097e-04
Loss = 2.0427e-01, PNorm = 43.5650, GNorm = 1.9422, lr_0 = 9.6031e-04
Loss = 2.1498e-01, PNorm = 43.5973, GNorm = 2.4936, lr_0 = 9.5965e-04
Loss = 1.7394e-01, PNorm = 43.6141, GNorm = 0.8458, lr_0 = 9.5899e-04
Loss = 1.7219e-01, PNorm = 43.6305, GNorm = 3.0901, lr_0 = 9.5834e-04
Loss = 1.9082e-01, PNorm = 43.6520, GNorm = 1.7576, lr_0 = 9.5768e-04
Loss = 1.9455e-01, PNorm = 43.6767, GNorm = 3.1507, lr_0 = 9.5702e-04
Loss = 1.8332e-01, PNorm = 43.6976, GNorm = 1.1243, lr_0 = 9.5637e-04
Loss = 1.8330e-01, PNorm = 43.7134, GNorm = 4.6016, lr_0 = 9.5571e-04
Loss = 2.2039e-01, PNorm = 43.7326, GNorm = 2.3518, lr_0 = 9.5506e-04
Loss = 2.5247e-01, PNorm = 43.7614, GNorm = 5.3396, lr_0 = 9.5440e-04
Loss = 2.2085e-01, PNorm = 43.7990, GNorm = 1.6001, lr_0 = 9.5375e-04
Loss = 2.5846e-01, PNorm = 43.8348, GNorm = 2.5414, lr_0 = 9.5310e-04
Loss = 1.8639e-01, PNorm = 43.8662, GNorm = 1.2497, lr_0 = 9.5244e-04
Loss = 2.2270e-01, PNorm = 43.8836, GNorm = 1.1772, lr_0 = 9.5179e-04
Loss = 1.9040e-01, PNorm = 43.9080, GNorm = 0.9386, lr_0 = 9.5114e-04
Loss = 1.7986e-01, PNorm = 43.9381, GNorm = 1.2273, lr_0 = 9.5049e-04
Loss = 2.1917e-01, PNorm = 43.9687, GNorm = 4.0820, lr_0 = 9.4984e-04
Loss = 2.0186e-01, PNorm = 43.9889, GNorm = 1.0280, lr_0 = 9.4919e-04
Loss = 1.9563e-01, PNorm = 44.0151, GNorm = 2.5278, lr_0 = 9.4854e-04
Loss = 1.8891e-01, PNorm = 44.0434, GNorm = 1.9787, lr_0 = 9.4789e-04
Loss = 2.0831e-01, PNorm = 44.0669, GNorm = 0.9294, lr_0 = 9.4724e-04
Loss = 2.1341e-01, PNorm = 44.0918, GNorm = 2.9163, lr_0 = 9.4659e-04
Loss = 2.0481e-01, PNorm = 44.1147, GNorm = 4.9885, lr_0 = 9.4594e-04
Loss = 1.8562e-01, PNorm = 44.1383, GNorm = 1.4590, lr_0 = 9.4529e-04
Loss = 1.9880e-01, PNorm = 44.1623, GNorm = 2.7906, lr_0 = 9.4464e-04
Loss = 2.0081e-01, PNorm = 44.1933, GNorm = 2.0724, lr_0 = 9.4400e-04
Loss = 1.7418e-01, PNorm = 44.2228, GNorm = 4.3648, lr_0 = 9.4335e-04
Loss = 2.1035e-01, PNorm = 44.2511, GNorm = 3.0484, lr_0 = 9.4270e-04
Loss = 2.0338e-01, PNorm = 44.2843, GNorm = 1.8338, lr_0 = 9.4206e-04
Loss = 2.2015e-01, PNorm = 44.3151, GNorm = 3.6117, lr_0 = 9.4141e-04
Loss = 1.7272e-01, PNorm = 44.3418, GNorm = 0.6889, lr_0 = 9.4077e-04
Loss = 1.7849e-01, PNorm = 44.3616, GNorm = 1.0871, lr_0 = 9.4012e-04
Loss = 2.0018e-01, PNorm = 44.3793, GNorm = 1.9391, lr_0 = 9.3948e-04
Loss = 1.8087e-01, PNorm = 44.4061, GNorm = 0.6221, lr_0 = 9.3884e-04
Loss = 1.7153e-01, PNorm = 44.4199, GNorm = 1.2026, lr_0 = 9.3819e-04
Loss = 1.9076e-01, PNorm = 44.4366, GNorm = 0.9189, lr_0 = 9.3755e-04
Loss = 1.8789e-01, PNorm = 44.4587, GNorm = 1.3730, lr_0 = 9.3691e-04
Loss = 1.8592e-01, PNorm = 44.4811, GNorm = 2.9228, lr_0 = 9.3627e-04
Loss = 1.7890e-01, PNorm = 44.5074, GNorm = 1.6323, lr_0 = 9.3562e-04
Loss = 1.7183e-01, PNorm = 44.5331, GNorm = 1.4536, lr_0 = 9.3498e-04
Loss = 1.9487e-01, PNorm = 44.5620, GNorm = 2.0963, lr_0 = 9.3434e-04
Loss = 2.0098e-01, PNorm = 44.5946, GNorm = 1.1791, lr_0 = 9.3370e-04
Loss = 2.0312e-01, PNorm = 44.6164, GNorm = 2.0222, lr_0 = 9.3306e-04
Loss = 1.8206e-01, PNorm = 44.6329, GNorm = 1.0435, lr_0 = 9.3242e-04
Loss = 1.8259e-01, PNorm = 44.6608, GNorm = 2.0985, lr_0 = 9.3178e-04
Loss = 1.8715e-01, PNorm = 44.6824, GNorm = 5.6551, lr_0 = 9.3115e-04
Loss = 1.8972e-01, PNorm = 44.7098, GNorm = 1.2967, lr_0 = 9.3051e-04
Loss = 1.9781e-01, PNorm = 44.7360, GNorm = 1.4808, lr_0 = 9.2987e-04
Loss = 1.7850e-01, PNorm = 44.7541, GNorm = 1.6467, lr_0 = 9.2923e-04
Loss = 1.7365e-01, PNorm = 44.7676, GNorm = 0.8296, lr_0 = 9.2860e-04
Loss = 1.9432e-01, PNorm = 44.7867, GNorm = 4.5866, lr_0 = 9.2796e-04
Loss = 1.8618e-01, PNorm = 44.8101, GNorm = 0.9596, lr_0 = 9.2733e-04
Loss = 1.7351e-01, PNorm = 44.8286, GNorm = 1.0396, lr_0 = 9.2669e-04
Loss = 2.0595e-01, PNorm = 44.8514, GNorm = 1.3928, lr_0 = 9.2606e-04
Loss = 2.0893e-01, PNorm = 44.8891, GNorm = 3.1682, lr_0 = 9.2542e-04
Loss = 2.0612e-01, PNorm = 44.9150, GNorm = 2.4859, lr_0 = 9.2479e-04
Loss = 2.1836e-01, PNorm = 44.9438, GNorm = 1.6932, lr_0 = 9.2415e-04
Loss = 1.7963e-01, PNorm = 44.9735, GNorm = 1.3350, lr_0 = 9.2352e-04
Loss = 1.8238e-01, PNorm = 44.9941, GNorm = 0.7019, lr_0 = 9.2289e-04
Loss = 1.6198e-01, PNorm = 45.0009, GNorm = 1.0728, lr_0 = 9.2226e-04
Loss = 1.6328e-01, PNorm = 45.0159, GNorm = 0.6467, lr_0 = 9.2162e-04
Loss = 1.6737e-01, PNorm = 45.0291, GNorm = 1.9813, lr_0 = 9.2099e-04
Validation mae = 0.487002
Epoch 3
Loss = 1.6022e-01, PNorm = 45.0515, GNorm = 1.1864, lr_0 = 9.2036e-04
Loss = 1.8086e-01, PNorm = 45.0749, GNorm = 4.3897, lr_0 = 9.1973e-04
Loss = 1.7979e-01, PNorm = 45.0973, GNorm = 2.8968, lr_0 = 9.1910e-04
Loss = 1.7251e-01, PNorm = 45.1084, GNorm = 0.7415, lr_0 = 9.1847e-04
Loss = 1.5576e-01, PNorm = 45.1307, GNorm = 1.2798, lr_0 = 9.1784e-04
Loss = 1.6937e-01, PNorm = 45.1551, GNorm = 1.3518, lr_0 = 9.1721e-04
Loss = 1.7418e-01, PNorm = 45.1859, GNorm = 1.8240, lr_0 = 9.1658e-04
Loss = 1.9147e-01, PNorm = 45.2140, GNorm = 3.7411, lr_0 = 9.1596e-04
Loss = 2.0157e-01, PNorm = 45.2318, GNorm = 2.8296, lr_0 = 9.1533e-04
Loss = 2.0446e-01, PNorm = 45.2564, GNorm = 4.9709, lr_0 = 9.1470e-04
Loss = 1.7368e-01, PNorm = 45.2856, GNorm = 1.1632, lr_0 = 9.1408e-04
Loss = 1.5908e-01, PNorm = 45.3180, GNorm = 0.6004, lr_0 = 9.1345e-04
Loss = 1.5476e-01, PNorm = 45.3412, GNorm = 1.0123, lr_0 = 9.1282e-04
Loss = 1.7652e-01, PNorm = 45.3554, GNorm = 1.9914, lr_0 = 9.1220e-04
Loss = 1.9621e-01, PNorm = 45.3748, GNorm = 0.9969, lr_0 = 9.1157e-04
Loss = 2.0905e-01, PNorm = 45.4069, GNorm = 1.6227, lr_0 = 9.1095e-04
Loss = 1.7258e-01, PNorm = 45.4431, GNorm = 0.9562, lr_0 = 9.1032e-04
Loss = 1.6003e-01, PNorm = 45.4781, GNorm = 1.4507, lr_0 = 9.0970e-04
Loss = 1.8923e-01, PNorm = 45.5055, GNorm = 4.0308, lr_0 = 9.0908e-04
Loss = 1.9646e-01, PNorm = 45.5248, GNorm = 1.0439, lr_0 = 9.0846e-04
Loss = 1.5076e-01, PNorm = 45.5447, GNorm = 0.8046, lr_0 = 9.0783e-04
Loss = 1.5501e-01, PNorm = 45.5657, GNorm = 3.3457, lr_0 = 9.0721e-04
Loss = 1.7060e-01, PNorm = 45.5902, GNorm = 0.9132, lr_0 = 9.0659e-04
Loss = 1.6219e-01, PNorm = 45.6125, GNorm = 1.0141, lr_0 = 9.0597e-04
Loss = 1.9014e-01, PNorm = 45.6372, GNorm = 2.1980, lr_0 = 9.0535e-04
Loss = 1.5908e-01, PNorm = 45.6613, GNorm = 0.9968, lr_0 = 9.0473e-04
Loss = 1.9456e-01, PNorm = 45.6905, GNorm = 1.2460, lr_0 = 9.0411e-04
Loss = 1.6894e-01, PNorm = 45.7190, GNorm = 0.8939, lr_0 = 9.0349e-04
Loss = 1.5118e-01, PNorm = 45.7488, GNorm = 1.2204, lr_0 = 9.0287e-04
Loss = 1.5364e-01, PNorm = 45.7639, GNorm = 0.9354, lr_0 = 9.0225e-04
Loss = 1.6432e-01, PNorm = 45.7892, GNorm = 1.1238, lr_0 = 9.0163e-04
Loss = 1.7781e-01, PNorm = 45.8058, GNorm = 2.5932, lr_0 = 9.0102e-04
Loss = 1.9568e-01, PNorm = 45.8342, GNorm = 1.6595, lr_0 = 9.0040e-04
Loss = 1.7218e-01, PNorm = 45.8629, GNorm = 3.2183, lr_0 = 8.9978e-04
Loss = 1.8021e-01, PNorm = 45.8915, GNorm = 0.9501, lr_0 = 8.9916e-04
Loss = 1.5055e-01, PNorm = 45.9113, GNorm = 0.8798, lr_0 = 8.9855e-04
Loss = 1.4631e-01, PNorm = 45.9311, GNorm = 0.8688, lr_0 = 8.9793e-04
Loss = 1.5467e-01, PNorm = 45.9498, GNorm = 2.2012, lr_0 = 8.9732e-04
Loss = 1.4360e-01, PNorm = 45.9744, GNorm = 3.0640, lr_0 = 8.9670e-04
Loss = 1.9736e-01, PNorm = 46.0036, GNorm = 1.8147, lr_0 = 8.9609e-04
Loss = 1.6123e-01, PNorm = 46.0356, GNorm = 0.8630, lr_0 = 8.9548e-04
Loss = 1.7197e-01, PNorm = 46.0632, GNorm = 2.2125, lr_0 = 8.9486e-04
Loss = 1.6228e-01, PNorm = 46.0786, GNorm = 1.2107, lr_0 = 8.9425e-04
Loss = 1.8042e-01, PNorm = 46.0999, GNorm = 1.1462, lr_0 = 8.9364e-04
Loss = 1.5223e-01, PNorm = 46.1146, GNorm = 1.3076, lr_0 = 8.9302e-04
Loss = 1.7350e-01, PNorm = 46.1311, GNorm = 3.4547, lr_0 = 8.9241e-04
Loss = 1.6345e-01, PNorm = 46.1566, GNorm = 2.3278, lr_0 = 8.9180e-04
Loss = 1.5749e-01, PNorm = 46.1794, GNorm = 2.0521, lr_0 = 8.9119e-04
Loss = 1.5479e-01, PNorm = 46.2089, GNorm = 1.2465, lr_0 = 8.9058e-04
Loss = 1.5170e-01, PNorm = 46.2296, GNorm = 0.9282, lr_0 = 8.8997e-04
Loss = 1.6552e-01, PNorm = 46.2482, GNorm = 2.1027, lr_0 = 8.8936e-04
Loss = 1.7271e-01, PNorm = 46.2623, GNorm = 0.5552, lr_0 = 8.8875e-04
Loss = 1.6800e-01, PNorm = 46.2831, GNorm = 1.5736, lr_0 = 8.8814e-04
Loss = 2.0685e-01, PNorm = 46.2984, GNorm = 2.1944, lr_0 = 8.8753e-04
Loss = 1.8051e-01, PNorm = 46.3317, GNorm = 0.6717, lr_0 = 8.8693e-04
Loss = 1.8123e-01, PNorm = 46.3589, GNorm = 1.9120, lr_0 = 8.8632e-04
Loss = 2.0337e-01, PNorm = 46.3812, GNorm = 1.9509, lr_0 = 8.8571e-04
Loss = 2.0925e-01, PNorm = 46.4192, GNorm = 1.1402, lr_0 = 8.8510e-04
Loss = 1.4601e-01, PNorm = 46.4552, GNorm = 1.1502, lr_0 = 8.8450e-04
Loss = 1.6932e-01, PNorm = 46.4787, GNorm = 1.4809, lr_0 = 8.8389e-04
Loss = 1.7717e-01, PNorm = 46.5075, GNorm = 1.9536, lr_0 = 8.8329e-04
Loss = 1.6701e-01, PNorm = 46.5366, GNorm = 1.5354, lr_0 = 8.8268e-04
Loss = 1.6633e-01, PNorm = 46.5676, GNorm = 2.0197, lr_0 = 8.8208e-04
Loss = 1.9809e-01, PNorm = 46.5957, GNorm = 2.7288, lr_0 = 8.8147e-04
Loss = 1.6775e-01, PNorm = 46.6222, GNorm = 3.1504, lr_0 = 8.8087e-04
Loss = 1.6256e-01, PNorm = 46.6532, GNorm = 0.7278, lr_0 = 8.8026e-04
Loss = 1.7145e-01, PNorm = 46.6825, GNorm = 0.8658, lr_0 = 8.7966e-04
Loss = 1.6841e-01, PNorm = 46.7156, GNorm = 3.3357, lr_0 = 8.7906e-04
Loss = 1.6960e-01, PNorm = 46.7317, GNorm = 1.2278, lr_0 = 8.7846e-04
Loss = 2.0094e-01, PNorm = 46.7455, GNorm = 2.1144, lr_0 = 8.7785e-04
Loss = 1.7320e-01, PNorm = 46.7578, GNorm = 2.7503, lr_0 = 8.7725e-04
Loss = 1.7618e-01, PNorm = 46.7818, GNorm = 3.3029, lr_0 = 8.7665e-04
Loss = 1.8511e-01, PNorm = 46.8105, GNorm = 1.0657, lr_0 = 8.7605e-04
Loss = 1.8669e-01, PNorm = 46.8365, GNorm = 3.0182, lr_0 = 8.7545e-04
Loss = 1.7162e-01, PNorm = 46.8634, GNorm = 1.8888, lr_0 = 8.7485e-04
Loss = 1.6415e-01, PNorm = 46.8907, GNorm = 1.1696, lr_0 = 8.7425e-04
Loss = 1.8915e-01, PNorm = 46.9053, GNorm = 1.4629, lr_0 = 8.7365e-04
Loss = 1.7303e-01, PNorm = 46.9165, GNorm = 2.3122, lr_0 = 8.7306e-04
Loss = 1.9000e-01, PNorm = 46.9360, GNorm = 4.7520, lr_0 = 8.7246e-04
Loss = 1.6529e-01, PNorm = 46.9695, GNorm = 1.0268, lr_0 = 8.7186e-04
Loss = 1.4924e-01, PNorm = 46.9935, GNorm = 1.8860, lr_0 = 8.7126e-04
Loss = 1.9518e-01, PNorm = 47.0128, GNorm = 3.0076, lr_0 = 8.7067e-04
Loss = 1.7285e-01, PNorm = 47.0391, GNorm = 2.5219, lr_0 = 8.7007e-04
Loss = 1.7884e-01, PNorm = 47.0599, GNorm = 3.1891, lr_0 = 8.6947e-04
Loss = 1.8145e-01, PNorm = 47.0892, GNorm = 1.5721, lr_0 = 8.6888e-04
Loss = 1.7315e-01, PNorm = 47.1134, GNorm = 1.5641, lr_0 = 8.6828e-04
Loss = 2.0661e-01, PNorm = 47.1383, GNorm = 1.8469, lr_0 = 8.6769e-04
Loss = 1.9091e-01, PNorm = 47.1772, GNorm = 2.2077, lr_0 = 8.6709e-04
Loss = 1.5381e-01, PNorm = 47.2092, GNorm = 0.8773, lr_0 = 8.6650e-04
Loss = 1.6732e-01, PNorm = 47.2318, GNorm = 2.3521, lr_0 = 8.6590e-04
Loss = 1.7326e-01, PNorm = 47.2456, GNorm = 0.8054, lr_0 = 8.6531e-04
Loss = 1.7843e-01, PNorm = 47.2723, GNorm = 3.0567, lr_0 = 8.6472e-04
Loss = 2.0118e-01, PNorm = 47.2959, GNorm = 1.7129, lr_0 = 8.6413e-04
Loss = 1.6983e-01, PNorm = 47.3226, GNorm = 1.2309, lr_0 = 8.6353e-04
Loss = 1.6454e-01, PNorm = 47.3504, GNorm = 2.4007, lr_0 = 8.6294e-04
Loss = 1.8296e-01, PNorm = 47.3817, GNorm = 1.9449, lr_0 = 8.6235e-04
Loss = 1.7007e-01, PNorm = 47.4051, GNorm = 0.8034, lr_0 = 8.6176e-04
Loss = 2.0002e-01, PNorm = 47.4228, GNorm = 0.9785, lr_0 = 8.6117e-04
Loss = 1.6236e-01, PNorm = 47.4462, GNorm = 2.2415, lr_0 = 8.6058e-04
Loss = 1.5786e-01, PNorm = 47.4677, GNorm = 1.3650, lr_0 = 8.5999e-04
Loss = 1.9543e-01, PNorm = 47.4897, GNorm = 1.5414, lr_0 = 8.5940e-04
Loss = 1.4273e-01, PNorm = 47.5072, GNorm = 0.9152, lr_0 = 8.5881e-04
Loss = 1.7769e-01, PNorm = 47.5307, GNorm = 1.4883, lr_0 = 8.5823e-04
Loss = 1.4830e-01, PNorm = 47.5536, GNorm = 2.8673, lr_0 = 8.5764e-04
Loss = 2.1348e-01, PNorm = 47.5744, GNorm = 5.3599, lr_0 = 8.5705e-04
Loss = 2.0392e-01, PNorm = 47.6077, GNorm = 1.2647, lr_0 = 8.5646e-04
Loss = 1.8069e-01, PNorm = 47.6462, GNorm = 1.2269, lr_0 = 8.5588e-04
Loss = 1.8052e-01, PNorm = 47.6682, GNorm = 1.1161, lr_0 = 8.5529e-04
Loss = 1.5921e-01, PNorm = 47.6795, GNorm = 0.4881, lr_0 = 8.5470e-04
Loss = 1.4885e-01, PNorm = 47.6964, GNorm = 0.9082, lr_0 = 8.5412e-04
Loss = 1.4725e-01, PNorm = 47.7196, GNorm = 1.7565, lr_0 = 8.5353e-04
Loss = 1.6882e-01, PNorm = 47.7299, GNorm = 1.5330, lr_0 = 8.5295e-04
Loss = 1.6471e-01, PNorm = 47.7555, GNorm = 1.2226, lr_0 = 8.5236e-04
Loss = 1.6825e-01, PNorm = 47.7764, GNorm = 2.1874, lr_0 = 8.5178e-04
Loss = 1.4128e-01, PNorm = 47.7957, GNorm = 0.6618, lr_0 = 8.5120e-04
Loss = 1.4864e-01, PNorm = 47.8125, GNorm = 1.3061, lr_0 = 8.5061e-04
Loss = 1.6451e-01, PNorm = 47.8255, GNorm = 1.3533, lr_0 = 8.5003e-04
Loss = 1.7608e-01, PNorm = 47.8440, GNorm = 2.2681, lr_0 = 8.4945e-04
Loss = 1.5725e-01, PNorm = 47.8632, GNorm = 0.9886, lr_0 = 8.4887e-04
Loss = 1.4725e-01, PNorm = 47.8802, GNorm = 2.7294, lr_0 = 8.4828e-04
Validation mae = 0.448405
Epoch 4
Loss = 1.5020e-01, PNorm = 47.9001, GNorm = 1.0491, lr_0 = 8.4770e-04
Loss = 1.4344e-01, PNorm = 47.9157, GNorm = 1.2498, lr_0 = 8.4712e-04
Loss = 1.6285e-01, PNorm = 47.9333, GNorm = 1.3504, lr_0 = 8.4654e-04
Loss = 1.4941e-01, PNorm = 47.9508, GNorm = 0.7504, lr_0 = 8.4596e-04
Loss = 1.2802e-01, PNorm = 47.9666, GNorm = 0.7230, lr_0 = 8.4538e-04
Loss = 1.3960e-01, PNorm = 47.9821, GNorm = 1.5294, lr_0 = 8.4480e-04
Loss = 1.3268e-01, PNorm = 47.9986, GNorm = 0.6918, lr_0 = 8.4423e-04
Loss = 1.4887e-01, PNorm = 48.0090, GNorm = 0.8801, lr_0 = 8.4365e-04
Loss = 1.4390e-01, PNorm = 48.0265, GNorm = 0.7519, lr_0 = 8.4307e-04
Loss = 1.6904e-01, PNorm = 48.0498, GNorm = 1.8469, lr_0 = 8.4249e-04
Loss = 1.4885e-01, PNorm = 48.0790, GNorm = 2.4451, lr_0 = 8.4191e-04
Loss = 1.5202e-01, PNorm = 48.1020, GNorm = 0.8191, lr_0 = 8.4134e-04
Loss = 1.6098e-01, PNorm = 48.1249, GNorm = 2.3940, lr_0 = 8.4076e-04
Loss = 1.3770e-01, PNorm = 48.1514, GNorm = 0.4852, lr_0 = 8.4019e-04
Loss = 1.4648e-01, PNorm = 48.1734, GNorm = 0.7871, lr_0 = 8.3961e-04
Loss = 1.6052e-01, PNorm = 48.1865, GNorm = 1.3563, lr_0 = 8.3903e-04
Loss = 1.7444e-01, PNorm = 48.2021, GNorm = 1.0745, lr_0 = 8.3846e-04
Loss = 1.7723e-01, PNorm = 48.2255, GNorm = 1.2662, lr_0 = 8.3789e-04
Loss = 1.4967e-01, PNorm = 48.2570, GNorm = 1.0253, lr_0 = 8.3731e-04
Loss = 1.4811e-01, PNorm = 48.2741, GNorm = 0.7149, lr_0 = 8.3674e-04
Loss = 1.6041e-01, PNorm = 48.2928, GNorm = 0.9552, lr_0 = 8.3616e-04
Loss = 1.5572e-01, PNorm = 48.3024, GNorm = 0.8289, lr_0 = 8.3559e-04
Loss = 1.4093e-01, PNorm = 48.3162, GNorm = 1.0798, lr_0 = 8.3502e-04
Loss = 1.5423e-01, PNorm = 48.3345, GNorm = 0.8284, lr_0 = 8.3445e-04
Loss = 1.6363e-01, PNorm = 48.3513, GNorm = 1.4112, lr_0 = 8.3388e-04
Loss = 1.5491e-01, PNorm = 48.3752, GNorm = 1.4417, lr_0 = 8.3330e-04
Loss = 1.4550e-01, PNorm = 48.4005, GNorm = 0.6333, lr_0 = 8.3273e-04
Loss = 1.5780e-01, PNorm = 48.4220, GNorm = 2.0074, lr_0 = 8.3216e-04
Loss = 1.6226e-01, PNorm = 48.4443, GNorm = 0.6609, lr_0 = 8.3159e-04
Loss = 1.7016e-01, PNorm = 48.4673, GNorm = 1.6444, lr_0 = 8.3102e-04
Loss = 1.6681e-01, PNorm = 48.4874, GNorm = 0.9021, lr_0 = 8.3045e-04
Loss = 1.4722e-01, PNorm = 48.5136, GNorm = 1.3917, lr_0 = 8.2988e-04
Loss = 1.7936e-01, PNorm = 48.5376, GNorm = 1.2837, lr_0 = 8.2932e-04
Loss = 1.6269e-01, PNorm = 48.5687, GNorm = 3.9928, lr_0 = 8.2875e-04
Loss = 1.5256e-01, PNorm = 48.5919, GNorm = 1.6911, lr_0 = 8.2818e-04
Loss = 1.3855e-01, PNorm = 48.6068, GNorm = 1.5051, lr_0 = 8.2761e-04
Loss = 1.6215e-01, PNorm = 48.6155, GNorm = 0.8994, lr_0 = 8.2705e-04
Loss = 1.4139e-01, PNorm = 48.6424, GNorm = 0.6980, lr_0 = 8.2648e-04
Loss = 1.2904e-01, PNorm = 48.6636, GNorm = 0.6219, lr_0 = 8.2591e-04
Loss = 1.6046e-01, PNorm = 48.6800, GNorm = 1.2497, lr_0 = 8.2535e-04
Loss = 1.6175e-01, PNorm = 48.7029, GNorm = 0.9401, lr_0 = 8.2478e-04
Loss = 1.7435e-01, PNorm = 48.7362, GNorm = 0.8850, lr_0 = 8.2422e-04
Loss = 1.2810e-01, PNorm = 48.7678, GNorm = 1.4264, lr_0 = 8.2365e-04
Loss = 1.4053e-01, PNorm = 48.7851, GNorm = 1.0908, lr_0 = 8.2309e-04
Loss = 1.5366e-01, PNorm = 48.7973, GNorm = 1.0920, lr_0 = 8.2252e-04
Loss = 1.7336e-01, PNorm = 48.8245, GNorm = 1.0622, lr_0 = 8.2196e-04
Loss = 1.2863e-01, PNorm = 48.8513, GNorm = 0.9770, lr_0 = 8.2140e-04
Loss = 1.5496e-01, PNorm = 48.8743, GNorm = 1.1709, lr_0 = 8.2084e-04
Loss = 1.5041e-01, PNorm = 48.9026, GNorm = 1.8684, lr_0 = 8.2027e-04
Loss = 1.6410e-01, PNorm = 48.9280, GNorm = 1.1295, lr_0 = 8.1971e-04
Loss = 1.6953e-01, PNorm = 48.9516, GNorm = 1.3866, lr_0 = 8.1915e-04
Loss = 1.4851e-01, PNorm = 48.9717, GNorm = 1.9897, lr_0 = 8.1859e-04
Loss = 1.2523e-01, PNorm = 48.9999, GNorm = 0.8634, lr_0 = 8.1803e-04
Loss = 1.6183e-01, PNorm = 49.0230, GNorm = 1.0627, lr_0 = 8.1747e-04
Loss = 1.3200e-01, PNorm = 49.0399, GNorm = 1.7180, lr_0 = 8.1691e-04
Loss = 1.6731e-01, PNorm = 49.0581, GNorm = 3.0810, lr_0 = 8.1635e-04
Loss = 1.3182e-01, PNorm = 49.0736, GNorm = 1.1321, lr_0 = 8.1579e-04
Loss = 1.3456e-01, PNorm = 49.0855, GNorm = 1.2354, lr_0 = 8.1523e-04
Loss = 1.3444e-01, PNorm = 49.1066, GNorm = 0.8410, lr_0 = 8.1467e-04
Loss = 1.2826e-01, PNorm = 49.1273, GNorm = 0.9899, lr_0 = 8.1411e-04
Loss = 1.3214e-01, PNorm = 49.1447, GNorm = 0.7133, lr_0 = 8.1355e-04
Loss = 1.5488e-01, PNorm = 49.1673, GNorm = 2.0164, lr_0 = 8.1300e-04
Loss = 1.3304e-01, PNorm = 49.1815, GNorm = 0.8049, lr_0 = 8.1244e-04
Loss = 1.6709e-01, PNorm = 49.2094, GNorm = 0.7910, lr_0 = 8.1188e-04
Loss = 1.5040e-01, PNorm = 49.2361, GNorm = 1.1761, lr_0 = 8.1133e-04
Loss = 1.4727e-01, PNorm = 49.2623, GNorm = 1.1927, lr_0 = 8.1077e-04
Loss = 1.4816e-01, PNorm = 49.2892, GNorm = 1.9404, lr_0 = 8.1022e-04
Loss = 1.4544e-01, PNorm = 49.3129, GNorm = 0.9691, lr_0 = 8.0966e-04
Loss = 1.6585e-01, PNorm = 49.3439, GNorm = 1.7338, lr_0 = 8.0911e-04
Loss = 1.4360e-01, PNorm = 49.3585, GNorm = 1.3616, lr_0 = 8.0855e-04
Loss = 1.7602e-01, PNorm = 49.3654, GNorm = 1.0200, lr_0 = 8.0800e-04
Loss = 1.4181e-01, PNorm = 49.3894, GNorm = 1.4733, lr_0 = 8.0745e-04
Loss = 1.4553e-01, PNorm = 49.4153, GNorm = 1.1276, lr_0 = 8.0689e-04
Loss = 1.5487e-01, PNorm = 49.4313, GNorm = 1.9809, lr_0 = 8.0634e-04
Loss = 1.5852e-01, PNorm = 49.4481, GNorm = 2.3448, lr_0 = 8.0579e-04
Loss = 1.6542e-01, PNorm = 49.4729, GNorm = 2.6148, lr_0 = 8.0523e-04
Loss = 1.5213e-01, PNorm = 49.4995, GNorm = 0.6305, lr_0 = 8.0468e-04
Loss = 1.6321e-01, PNorm = 49.5158, GNorm = 0.7891, lr_0 = 8.0413e-04
Loss = 1.4940e-01, PNorm = 49.5329, GNorm = 2.4717, lr_0 = 8.0358e-04
Loss = 1.4633e-01, PNorm = 49.5566, GNorm = 1.1379, lr_0 = 8.0303e-04
Loss = 1.5405e-01, PNorm = 49.5769, GNorm = 0.5354, lr_0 = 8.0248e-04
Loss = 1.4599e-01, PNorm = 49.5892, GNorm = 0.7653, lr_0 = 8.0193e-04
Loss = 1.4292e-01, PNorm = 49.6083, GNorm = 1.4003, lr_0 = 8.0138e-04
Loss = 1.5111e-01, PNorm = 49.6313, GNorm = 0.8252, lr_0 = 8.0083e-04
Loss = 1.5837e-01, PNorm = 49.6473, GNorm = 1.0166, lr_0 = 8.0028e-04
Loss = 1.6240e-01, PNorm = 49.6686, GNorm = 1.0160, lr_0 = 7.9974e-04
Loss = 1.4128e-01, PNorm = 49.6898, GNorm = 0.8842, lr_0 = 7.9919e-04
Loss = 1.5385e-01, PNorm = 49.7129, GNorm = 1.1713, lr_0 = 7.9864e-04
Loss = 1.5300e-01, PNorm = 49.7420, GNorm = 0.8692, lr_0 = 7.9809e-04
Loss = 1.5203e-01, PNorm = 49.7669, GNorm = 1.0876, lr_0 = 7.9755e-04
Loss = 1.3904e-01, PNorm = 49.7930, GNorm = 1.1724, lr_0 = 7.9700e-04
Loss = 1.5641e-01, PNorm = 49.8158, GNorm = 1.5865, lr_0 = 7.9645e-04
Loss = 1.6656e-01, PNorm = 49.8328, GNorm = 3.1781, lr_0 = 7.9591e-04
Loss = 1.1805e-01, PNorm = 49.8554, GNorm = 1.1145, lr_0 = 7.9536e-04
Loss = 1.2797e-01, PNorm = 49.8795, GNorm = 1.0044, lr_0 = 7.9482e-04
Loss = 1.6514e-01, PNorm = 49.9026, GNorm = 1.9995, lr_0 = 7.9427e-04
Loss = 1.5697e-01, PNorm = 49.9195, GNorm = 0.9195, lr_0 = 7.9373e-04
Loss = 1.6672e-01, PNorm = 49.9419, GNorm = 1.5037, lr_0 = 7.9319e-04
Loss = 1.2737e-01, PNorm = 49.9664, GNorm = 1.2713, lr_0 = 7.9264e-04
Loss = 1.5646e-01, PNorm = 49.9903, GNorm = 2.5685, lr_0 = 7.9210e-04
Loss = 1.7341e-01, PNorm = 50.0071, GNorm = 2.4732, lr_0 = 7.9156e-04
Loss = 1.6164e-01, PNorm = 50.0293, GNorm = 1.1645, lr_0 = 7.9101e-04
Loss = 1.6196e-01, PNorm = 50.0518, GNorm = 0.7543, lr_0 = 7.9047e-04
Loss = 1.3853e-01, PNorm = 50.0786, GNorm = 1.5379, lr_0 = 7.8993e-04
Loss = 1.5098e-01, PNorm = 50.1055, GNorm = 0.9751, lr_0 = 7.8939e-04
Loss = 1.4524e-01, PNorm = 50.1209, GNorm = 0.9787, lr_0 = 7.8885e-04
Loss = 1.6618e-01, PNorm = 50.1404, GNorm = 2.3781, lr_0 = 7.8831e-04
Loss = 1.6702e-01, PNorm = 50.1646, GNorm = 1.0774, lr_0 = 7.8777e-04
Loss = 1.6292e-01, PNorm = 50.1838, GNorm = 1.6276, lr_0 = 7.8723e-04
Loss = 1.7994e-01, PNorm = 50.2061, GNorm = 1.6533, lr_0 = 7.8669e-04
Loss = 1.5783e-01, PNorm = 50.2353, GNorm = 2.0653, lr_0 = 7.8615e-04
Loss = 1.7976e-01, PNorm = 50.2612, GNorm = 0.9275, lr_0 = 7.8561e-04
Loss = 1.4098e-01, PNorm = 50.2862, GNorm = 0.8211, lr_0 = 7.8507e-04
Loss = 1.7114e-01, PNorm = 50.3092, GNorm = 0.6749, lr_0 = 7.8454e-04
Loss = 1.3519e-01, PNorm = 50.3353, GNorm = 1.1252, lr_0 = 7.8400e-04
Loss = 1.4524e-01, PNorm = 50.3410, GNorm = 0.6500, lr_0 = 7.8346e-04
Loss = 1.2809e-01, PNorm = 50.3552, GNorm = 0.9236, lr_0 = 7.8293e-04
Loss = 1.3390e-01, PNorm = 50.3748, GNorm = 0.5516, lr_0 = 7.8239e-04
Loss = 1.7371e-01, PNorm = 50.3934, GNorm = 2.5982, lr_0 = 7.8185e-04
Loss = 1.4246e-01, PNorm = 50.4219, GNorm = 0.8652, lr_0 = 7.8132e-04
Validation mae = 0.437789
Epoch 5
Loss = 1.1570e-01, PNorm = 50.4389, GNorm = 0.7597, lr_0 = 7.8078e-04
Loss = 1.2096e-01, PNorm = 50.4539, GNorm = 1.3089, lr_0 = 7.8025e-04
Loss = 1.3079e-01, PNorm = 50.4661, GNorm = 1.4944, lr_0 = 7.7971e-04
Loss = 1.3376e-01, PNorm = 50.4883, GNorm = 1.3796, lr_0 = 7.7918e-04
Loss = 1.6455e-01, PNorm = 50.5153, GNorm = 2.1879, lr_0 = 7.7864e-04
Loss = 1.3742e-01, PNorm = 50.5481, GNorm = 1.3030, lr_0 = 7.7811e-04
Loss = 1.3841e-01, PNorm = 50.5719, GNorm = 0.6718, lr_0 = 7.7758e-04
Loss = 1.4322e-01, PNorm = 50.5921, GNorm = 0.9353, lr_0 = 7.7705e-04
Loss = 1.5768e-01, PNorm = 50.6127, GNorm = 1.6337, lr_0 = 7.7651e-04
Loss = 1.6137e-01, PNorm = 50.6277, GNorm = 1.9930, lr_0 = 7.7598e-04
Loss = 1.2874e-01, PNorm = 50.6487, GNorm = 0.7090, lr_0 = 7.7545e-04
Loss = 1.1672e-01, PNorm = 50.6623, GNorm = 0.5015, lr_0 = 7.7492e-04
Loss = 1.4936e-01, PNorm = 50.6712, GNorm = 0.6860, lr_0 = 7.7439e-04
Loss = 1.2702e-01, PNorm = 50.6934, GNorm = 1.0941, lr_0 = 7.7386e-04
Loss = 1.3921e-01, PNorm = 50.7153, GNorm = 1.7280, lr_0 = 7.7333e-04
Loss = 1.3895e-01, PNorm = 50.7293, GNorm = 1.3294, lr_0 = 7.7280e-04
Loss = 1.4361e-01, PNorm = 50.7495, GNorm = 0.9713, lr_0 = 7.7227e-04
Loss = 1.1831e-01, PNorm = 50.7782, GNorm = 0.8932, lr_0 = 7.7174e-04
Loss = 1.3528e-01, PNorm = 50.8058, GNorm = 0.9854, lr_0 = 7.7121e-04
Loss = 1.3775e-01, PNorm = 50.8256, GNorm = 0.7110, lr_0 = 7.7068e-04
Loss = 1.4099e-01, PNorm = 50.8435, GNorm = 0.8926, lr_0 = 7.7015e-04
Loss = 1.3542e-01, PNorm = 50.8601, GNorm = 0.8316, lr_0 = 7.6963e-04
Loss = 1.3612e-01, PNorm = 50.8703, GNorm = 0.8963, lr_0 = 7.6910e-04
Loss = 1.7541e-01, PNorm = 50.8818, GNorm = 2.2425, lr_0 = 7.6857e-04
Loss = 1.4201e-01, PNorm = 50.8997, GNorm = 1.2168, lr_0 = 7.6805e-04
Loss = 1.3213e-01, PNorm = 50.9275, GNorm = 1.2478, lr_0 = 7.6752e-04
Loss = 1.3362e-01, PNorm = 50.9428, GNorm = 0.5855, lr_0 = 7.6699e-04
Loss = 1.3656e-01, PNorm = 50.9564, GNorm = 1.4993, lr_0 = 7.6647e-04
Loss = 1.2854e-01, PNorm = 50.9735, GNorm = 1.8023, lr_0 = 7.6594e-04
Loss = 1.4706e-01, PNorm = 50.9953, GNorm = 1.1034, lr_0 = 7.6542e-04
Loss = 1.1828e-01, PNorm = 51.0190, GNorm = 0.5837, lr_0 = 7.6489e-04
Loss = 1.2630e-01, PNorm = 51.0304, GNorm = 0.9313, lr_0 = 7.6437e-04
Loss = 1.1040e-01, PNorm = 51.0412, GNorm = 0.7236, lr_0 = 7.6385e-04
Loss = 1.2956e-01, PNorm = 51.0537, GNorm = 1.2775, lr_0 = 7.6332e-04
Loss = 1.5594e-01, PNorm = 51.0747, GNorm = 1.4290, lr_0 = 7.6280e-04
Loss = 1.4194e-01, PNorm = 51.0910, GNorm = 1.4342, lr_0 = 7.6228e-04
Loss = 1.0978e-01, PNorm = 51.1112, GNorm = 1.1504, lr_0 = 7.6176e-04
Loss = 1.3436e-01, PNorm = 51.1358, GNorm = 1.4683, lr_0 = 7.6123e-04
Loss = 1.3958e-01, PNorm = 51.1553, GNorm = 0.9053, lr_0 = 7.6071e-04
Loss = 1.4154e-01, PNorm = 51.1699, GNorm = 0.9008, lr_0 = 7.6019e-04
Loss = 1.5508e-01, PNorm = 51.1827, GNorm = 0.8910, lr_0 = 7.5967e-04
Loss = 1.5166e-01, PNorm = 51.2020, GNorm = 0.8055, lr_0 = 7.5915e-04
Loss = 1.2166e-01, PNorm = 51.2219, GNorm = 0.6325, lr_0 = 7.5863e-04
Loss = 1.2532e-01, PNorm = 51.2348, GNorm = 1.0931, lr_0 = 7.5811e-04
Loss = 1.3764e-01, PNorm = 51.2521, GNorm = 1.4183, lr_0 = 7.5759e-04
Loss = 1.5052e-01, PNorm = 51.2712, GNorm = 1.1920, lr_0 = 7.5707e-04
Loss = 1.2957e-01, PNorm = 51.2903, GNorm = 0.8209, lr_0 = 7.5655e-04
Loss = 1.2420e-01, PNorm = 51.3106, GNorm = 1.0611, lr_0 = 7.5603e-04
Loss = 1.3280e-01, PNorm = 51.3326, GNorm = 1.0408, lr_0 = 7.5552e-04
Loss = 1.2946e-01, PNorm = 51.3539, GNorm = 0.6167, lr_0 = 7.5500e-04
Loss = 1.6224e-01, PNorm = 51.3730, GNorm = 3.1783, lr_0 = 7.5448e-04
Loss = 1.4990e-01, PNorm = 51.3929, GNorm = 0.5590, lr_0 = 7.5397e-04
Loss = 1.3183e-01, PNorm = 51.4103, GNorm = 1.1796, lr_0 = 7.5345e-04
Loss = 1.4346e-01, PNorm = 51.4358, GNorm = 0.6213, lr_0 = 7.5293e-04
Loss = 1.5588e-01, PNorm = 51.4648, GNorm = 1.6638, lr_0 = 7.5242e-04
Loss = 1.4592e-01, PNorm = 51.4901, GNorm = 1.2789, lr_0 = 7.5190e-04
Loss = 1.2824e-01, PNorm = 51.5158, GNorm = 0.5384, lr_0 = 7.5139e-04
Loss = 1.5280e-01, PNorm = 51.5337, GNorm = 0.7628, lr_0 = 7.5087e-04
Loss = 1.3441e-01, PNorm = 51.5499, GNorm = 2.4893, lr_0 = 7.5036e-04
Loss = 1.5196e-01, PNorm = 51.5693, GNorm = 1.7743, lr_0 = 7.4984e-04
Loss = 1.4441e-01, PNorm = 51.5840, GNorm = 1.5301, lr_0 = 7.4933e-04
Loss = 1.3633e-01, PNorm = 51.6026, GNorm = 1.4429, lr_0 = 7.4882e-04
Loss = 1.4969e-01, PNorm = 51.6193, GNorm = 0.6681, lr_0 = 7.4830e-04
Loss = 1.4032e-01, PNorm = 51.6384, GNorm = 1.7553, lr_0 = 7.4779e-04
Loss = 1.2420e-01, PNorm = 51.6589, GNorm = 0.7194, lr_0 = 7.4728e-04
Loss = 1.5615e-01, PNorm = 51.6812, GNorm = 0.8557, lr_0 = 7.4677e-04
Loss = 1.4694e-01, PNorm = 51.7033, GNorm = 0.7773, lr_0 = 7.4625e-04
Loss = 1.2945e-01, PNorm = 51.7239, GNorm = 0.9329, lr_0 = 7.4574e-04
Loss = 1.2307e-01, PNorm = 51.7349, GNorm = 0.6152, lr_0 = 7.4523e-04
Loss = 1.5782e-01, PNorm = 51.7529, GNorm = 0.9370, lr_0 = 7.4472e-04
Loss = 1.2908e-01, PNorm = 51.7729, GNorm = 0.7888, lr_0 = 7.4421e-04
Loss = 1.5719e-01, PNorm = 51.7968, GNorm = 1.1224, lr_0 = 7.4370e-04
Loss = 1.6562e-01, PNorm = 51.8224, GNorm = 3.4161, lr_0 = 7.4319e-04
Loss = 1.4935e-01, PNorm = 51.8508, GNorm = 1.0176, lr_0 = 7.4268e-04
Loss = 1.7565e-01, PNorm = 51.8908, GNorm = 2.1892, lr_0 = 7.4217e-04
Loss = 1.5099e-01, PNorm = 51.9226, GNorm = 1.3073, lr_0 = 7.4167e-04
Loss = 1.3695e-01, PNorm = 51.9508, GNorm = 0.5501, lr_0 = 7.4116e-04
Loss = 1.2837e-01, PNorm = 51.9710, GNorm = 1.1129, lr_0 = 7.4065e-04
Loss = 1.1315e-01, PNorm = 51.9754, GNorm = 0.6183, lr_0 = 7.4014e-04
Loss = 1.3061e-01, PNorm = 51.9868, GNorm = 1.0217, lr_0 = 7.3964e-04
Loss = 1.3629e-01, PNorm = 52.0133, GNorm = 0.9177, lr_0 = 7.3913e-04
Loss = 1.3617e-01, PNorm = 52.0378, GNorm = 0.5705, lr_0 = 7.3862e-04
Loss = 1.4796e-01, PNorm = 52.0524, GNorm = 1.5313, lr_0 = 7.3812e-04
Loss = 1.5045e-01, PNorm = 52.0735, GNorm = 2.3483, lr_0 = 7.3761e-04
Loss = 1.4405e-01, PNorm = 52.0974, GNorm = 2.3930, lr_0 = 7.3711e-04
Loss = 1.5924e-01, PNorm = 52.1123, GNorm = 0.6078, lr_0 = 7.3660e-04
Loss = 1.2537e-01, PNorm = 52.1368, GNorm = 0.6463, lr_0 = 7.3610e-04
Loss = 1.2560e-01, PNorm = 52.1542, GNorm = 0.5610, lr_0 = 7.3559e-04
Loss = 1.2628e-01, PNorm = 52.1671, GNorm = 1.0203, lr_0 = 7.3509e-04
Loss = 1.4349e-01, PNorm = 52.1865, GNorm = 1.2271, lr_0 = 7.3458e-04
Loss = 1.4174e-01, PNorm = 52.2093, GNorm = 0.8434, lr_0 = 7.3408e-04
Loss = 1.1655e-01, PNorm = 52.2301, GNorm = 0.9434, lr_0 = 7.3358e-04
Loss = 1.2992e-01, PNorm = 52.2398, GNorm = 1.4463, lr_0 = 7.3308e-04
Loss = 1.5098e-01, PNorm = 52.2604, GNorm = 1.9873, lr_0 = 7.3257e-04
Loss = 1.4929e-01, PNorm = 52.2872, GNorm = 0.7623, lr_0 = 7.3207e-04
Loss = 1.4846e-01, PNorm = 52.3154, GNorm = 1.7640, lr_0 = 7.3157e-04
Loss = 1.3517e-01, PNorm = 52.3346, GNorm = 1.0688, lr_0 = 7.3107e-04
Loss = 1.4943e-01, PNorm = 52.3563, GNorm = 1.8932, lr_0 = 7.3057e-04
Loss = 1.4304e-01, PNorm = 52.3779, GNorm = 1.0942, lr_0 = 7.3007e-04
Loss = 1.1778e-01, PNorm = 52.4086, GNorm = 0.6489, lr_0 = 7.2957e-04
Loss = 1.3164e-01, PNorm = 52.4252, GNorm = 0.8212, lr_0 = 7.2907e-04
Loss = 1.1868e-01, PNorm = 52.4391, GNorm = 0.8152, lr_0 = 7.2857e-04
Loss = 1.4782e-01, PNorm = 52.4520, GNorm = 2.0286, lr_0 = 7.2807e-04
Loss = 1.2035e-01, PNorm = 52.4621, GNorm = 0.9784, lr_0 = 7.2757e-04
Loss = 1.5042e-01, PNorm = 52.4805, GNorm = 1.3717, lr_0 = 7.2707e-04
Loss = 1.5299e-01, PNorm = 52.5016, GNorm = 1.0407, lr_0 = 7.2657e-04
Loss = 1.4015e-01, PNorm = 52.5216, GNorm = 2.0034, lr_0 = 7.2608e-04
Loss = 1.6403e-01, PNorm = 52.5374, GNorm = 1.1524, lr_0 = 7.2558e-04
Loss = 1.2491e-01, PNorm = 52.5577, GNorm = 1.5012, lr_0 = 7.2508e-04
Loss = 1.4833e-01, PNorm = 52.5779, GNorm = 0.7675, lr_0 = 7.2458e-04
Loss = 1.2875e-01, PNorm = 52.5884, GNorm = 0.6244, lr_0 = 7.2409e-04
Loss = 1.4573e-01, PNorm = 52.6077, GNorm = 1.6883, lr_0 = 7.2359e-04
Loss = 1.4627e-01, PNorm = 52.6313, GNorm = 0.7634, lr_0 = 7.2310e-04
Loss = 1.3868e-01, PNorm = 52.6518, GNorm = 0.7098, lr_0 = 7.2260e-04
Loss = 1.4198e-01, PNorm = 52.6826, GNorm = 1.2238, lr_0 = 7.2211e-04
Loss = 1.3611e-01, PNorm = 52.7055, GNorm = 0.7120, lr_0 = 7.2161e-04
Loss = 1.4093e-01, PNorm = 52.7227, GNorm = 1.7599, lr_0 = 7.2112e-04
Loss = 1.3945e-01, PNorm = 52.7382, GNorm = 1.6206, lr_0 = 7.2062e-04
Loss = 1.3579e-01, PNorm = 52.7595, GNorm = 0.9069, lr_0 = 7.2013e-04
Loss = 1.3064e-01, PNorm = 52.7813, GNorm = 1.5052, lr_0 = 7.1964e-04
Validation mae = 0.442674
Epoch 6
Loss = 1.3174e-01, PNorm = 52.7997, GNorm = 0.9452, lr_0 = 7.1914e-04
Loss = 1.2724e-01, PNorm = 52.8176, GNorm = 1.9716, lr_0 = 7.1865e-04
Loss = 1.2117e-01, PNorm = 52.8411, GNorm = 1.0623, lr_0 = 7.1816e-04
Loss = 1.0848e-01, PNorm = 52.8618, GNorm = 0.5924, lr_0 = 7.1767e-04
Loss = 1.0879e-01, PNorm = 52.8736, GNorm = 0.5159, lr_0 = 7.1717e-04
Loss = 1.3668e-01, PNorm = 52.8869, GNorm = 0.5775, lr_0 = 7.1668e-04
Loss = 1.2495e-01, PNorm = 52.9017, GNorm = 0.5611, lr_0 = 7.1619e-04
Loss = 1.2149e-01, PNorm = 52.9176, GNorm = 0.9691, lr_0 = 7.1570e-04
Loss = 1.2070e-01, PNorm = 52.9382, GNorm = 1.1008, lr_0 = 7.1521e-04
Loss = 1.1657e-01, PNorm = 52.9678, GNorm = 2.4741, lr_0 = 7.1472e-04
Loss = 1.2924e-01, PNorm = 52.9901, GNorm = 1.1193, lr_0 = 7.1423e-04
Loss = 1.3055e-01, PNorm = 53.0144, GNorm = 0.7274, lr_0 = 7.1374e-04
Loss = 1.2360e-01, PNorm = 53.0361, GNorm = 0.9591, lr_0 = 7.1325e-04
Loss = 1.2770e-01, PNorm = 53.0519, GNorm = 0.5840, lr_0 = 7.1277e-04
Loss = 1.1804e-01, PNorm = 53.0692, GNorm = 1.1486, lr_0 = 7.1228e-04
Loss = 1.3818e-01, PNorm = 53.0927, GNorm = 1.4902, lr_0 = 7.1179e-04
Loss = 1.2541e-01, PNorm = 53.1146, GNorm = 0.9387, lr_0 = 7.1130e-04
Loss = 1.1720e-01, PNorm = 53.1280, GNorm = 0.9867, lr_0 = 7.1081e-04
Loss = 1.3790e-01, PNorm = 53.1469, GNorm = 0.5322, lr_0 = 7.1033e-04
Loss = 1.2602e-01, PNorm = 53.1712, GNorm = 0.6913, lr_0 = 7.0984e-04
Loss = 1.3658e-01, PNorm = 53.1902, GNorm = 2.5432, lr_0 = 7.0935e-04
Loss = 1.2760e-01, PNorm = 53.2038, GNorm = 0.9645, lr_0 = 7.0887e-04
Loss = 1.1555e-01, PNorm = 53.2195, GNorm = 0.9144, lr_0 = 7.0838e-04
Loss = 1.2561e-01, PNorm = 53.2405, GNorm = 0.5814, lr_0 = 7.0790e-04
Loss = 1.2638e-01, PNorm = 53.2530, GNorm = 0.8654, lr_0 = 7.0741e-04
Loss = 1.3097e-01, PNorm = 53.2774, GNorm = 1.0119, lr_0 = 7.0693e-04
Loss = 1.5134e-01, PNorm = 53.3010, GNorm = 2.4175, lr_0 = 7.0644e-04
Loss = 1.5794e-01, PNorm = 53.3275, GNorm = 1.5204, lr_0 = 7.0596e-04
Loss = 1.2778e-01, PNorm = 53.3616, GNorm = 0.9875, lr_0 = 7.0548e-04
Loss = 1.3448e-01, PNorm = 53.3942, GNorm = 0.8494, lr_0 = 7.0499e-04
Loss = 1.3162e-01, PNorm = 53.4109, GNorm = 1.3251, lr_0 = 7.0451e-04
Loss = 1.2935e-01, PNorm = 53.4260, GNorm = 1.1294, lr_0 = 7.0403e-04
Loss = 1.2361e-01, PNorm = 53.4485, GNorm = 1.4598, lr_0 = 7.0354e-04
Loss = 1.1931e-01, PNorm = 53.4726, GNorm = 1.0762, lr_0 = 7.0306e-04
Loss = 1.4381e-01, PNorm = 53.4910, GNorm = 0.8754, lr_0 = 7.0258e-04
Loss = 1.3988e-01, PNorm = 53.5066, GNorm = 0.7424, lr_0 = 7.0210e-04
Loss = 1.0813e-01, PNorm = 53.5235, GNorm = 1.0858, lr_0 = 7.0162e-04
Loss = 1.3514e-01, PNorm = 53.5431, GNorm = 1.0286, lr_0 = 7.0114e-04
Loss = 1.1515e-01, PNorm = 53.5642, GNorm = 0.9567, lr_0 = 7.0066e-04
Loss = 1.2473e-01, PNorm = 53.5765, GNorm = 0.8276, lr_0 = 7.0018e-04
Loss = 1.2918e-01, PNorm = 53.5863, GNorm = 1.4480, lr_0 = 6.9970e-04
Loss = 1.3023e-01, PNorm = 53.5984, GNorm = 0.7278, lr_0 = 6.9922e-04
Loss = 1.1297e-01, PNorm = 53.6138, GNorm = 0.7735, lr_0 = 6.9874e-04
Loss = 1.2692e-01, PNorm = 53.6324, GNorm = 0.7768, lr_0 = 6.9826e-04
Loss = 1.1706e-01, PNorm = 53.6555, GNorm = 1.2157, lr_0 = 6.9778e-04
Loss = 1.1330e-01, PNorm = 53.6713, GNorm = 0.6084, lr_0 = 6.9730e-04
Loss = 1.2061e-01, PNorm = 53.6894, GNorm = 1.2617, lr_0 = 6.9683e-04
Loss = 1.2270e-01, PNorm = 53.7092, GNorm = 0.9337, lr_0 = 6.9635e-04
Loss = 1.2296e-01, PNorm = 53.7231, GNorm = 0.9624, lr_0 = 6.9587e-04
Loss = 1.2996e-01, PNorm = 53.7373, GNorm = 0.9215, lr_0 = 6.9540e-04
Loss = 1.1754e-01, PNorm = 53.7520, GNorm = 0.6244, lr_0 = 6.9492e-04
Loss = 9.7426e-02, PNorm = 53.7688, GNorm = 0.9367, lr_0 = 6.9444e-04
Loss = 1.2958e-01, PNorm = 53.7857, GNorm = 1.0302, lr_0 = 6.9397e-04
Loss = 1.3306e-01, PNorm = 53.8116, GNorm = 1.6907, lr_0 = 6.9349e-04
Loss = 1.4149e-01, PNorm = 53.8336, GNorm = 1.5929, lr_0 = 6.9302e-04
Loss = 1.1897e-01, PNorm = 53.8548, GNorm = 1.5156, lr_0 = 6.9254e-04
Loss = 1.0096e-01, PNorm = 53.8770, GNorm = 0.7570, lr_0 = 6.9207e-04
Loss = 1.2821e-01, PNorm = 53.8954, GNorm = 1.4110, lr_0 = 6.9159e-04
Loss = 1.3098e-01, PNorm = 53.9057, GNorm = 0.4540, lr_0 = 6.9112e-04
Loss = 1.2377e-01, PNorm = 53.9174, GNorm = 0.8207, lr_0 = 6.9065e-04
Loss = 1.3231e-01, PNorm = 53.9283, GNorm = 0.8968, lr_0 = 6.9017e-04
Loss = 1.4440e-01, PNorm = 53.9407, GNorm = 1.1361, lr_0 = 6.8970e-04
Loss = 1.3306e-01, PNorm = 53.9604, GNorm = 1.5434, lr_0 = 6.8923e-04
Loss = 1.3556e-01, PNorm = 53.9794, GNorm = 0.5023, lr_0 = 6.8876e-04
Loss = 1.0674e-01, PNorm = 53.9956, GNorm = 0.6256, lr_0 = 6.8828e-04
Loss = 1.2144e-01, PNorm = 54.0094, GNorm = 1.1393, lr_0 = 6.8781e-04
Loss = 1.2679e-01, PNorm = 54.0233, GNorm = 0.7587, lr_0 = 6.8734e-04
Loss = 1.2630e-01, PNorm = 54.0443, GNorm = 1.5464, lr_0 = 6.8687e-04
Loss = 1.3498e-01, PNorm = 54.0637, GNorm = 1.6072, lr_0 = 6.8640e-04
Loss = 1.2017e-01, PNorm = 54.0857, GNorm = 0.6893, lr_0 = 6.8593e-04
Loss = 1.3079e-01, PNorm = 54.1043, GNorm = 1.7348, lr_0 = 6.8546e-04
Loss = 1.2329e-01, PNorm = 54.1231, GNorm = 0.5968, lr_0 = 6.8499e-04
Loss = 1.3777e-01, PNorm = 54.1380, GNorm = 0.8475, lr_0 = 6.8452e-04
Loss = 1.3521e-01, PNorm = 54.1499, GNorm = 0.7101, lr_0 = 6.8405e-04
Loss = 1.1464e-01, PNorm = 54.1611, GNorm = 0.9353, lr_0 = 6.8358e-04
Loss = 1.2268e-01, PNorm = 54.1771, GNorm = 1.3443, lr_0 = 6.8312e-04
Loss = 1.3333e-01, PNorm = 54.1922, GNorm = 0.9262, lr_0 = 6.8265e-04
Loss = 1.3096e-01, PNorm = 54.2112, GNorm = 1.1079, lr_0 = 6.8218e-04
Loss = 1.1083e-01, PNorm = 54.2266, GNorm = 0.6678, lr_0 = 6.8171e-04
Loss = 1.3081e-01, PNorm = 54.2353, GNorm = 0.7741, lr_0 = 6.8125e-04
Loss = 1.3849e-01, PNorm = 54.2507, GNorm = 0.7561, lr_0 = 6.8078e-04
Loss = 1.2550e-01, PNorm = 54.2744, GNorm = 0.8634, lr_0 = 6.8031e-04
Loss = 1.1701e-01, PNorm = 54.2909, GNorm = 1.1039, lr_0 = 6.7985e-04
Loss = 1.2230e-01, PNorm = 54.3010, GNorm = 1.1882, lr_0 = 6.7938e-04
Loss = 1.0836e-01, PNorm = 54.3172, GNorm = 0.6134, lr_0 = 6.7892e-04
Loss = 1.6884e-01, PNorm = 54.3411, GNorm = 2.3377, lr_0 = 6.7845e-04
Loss = 1.4200e-01, PNorm = 54.3653, GNorm = 1.1004, lr_0 = 6.7799e-04
Loss = 1.5024e-01, PNorm = 54.3969, GNorm = 0.9355, lr_0 = 6.7752e-04
Loss = 1.3122e-01, PNorm = 54.4219, GNorm = 0.5736, lr_0 = 6.7706e-04
Loss = 1.1222e-01, PNorm = 54.4462, GNorm = 1.1381, lr_0 = 6.7659e-04
Loss = 1.3140e-01, PNorm = 54.4658, GNorm = 2.2456, lr_0 = 6.7613e-04
Loss = 1.1414e-01, PNorm = 54.4819, GNorm = 0.8750, lr_0 = 6.7567e-04
Loss = 1.3336e-01, PNorm = 54.4924, GNorm = 1.8564, lr_0 = 6.7520e-04
Loss = 1.2946e-01, PNorm = 54.5075, GNorm = 1.2687, lr_0 = 6.7474e-04
Loss = 1.2914e-01, PNorm = 54.5232, GNorm = 0.8641, lr_0 = 6.7428e-04
Loss = 1.0074e-01, PNorm = 54.5355, GNorm = 0.5988, lr_0 = 6.7382e-04
Loss = 1.2306e-01, PNorm = 54.5482, GNorm = 1.3346, lr_0 = 6.7335e-04
Loss = 1.2760e-01, PNorm = 54.5662, GNorm = 1.6939, lr_0 = 6.7289e-04
Loss = 1.3717e-01, PNorm = 54.5853, GNorm = 1.3225, lr_0 = 6.7243e-04
Loss = 1.1734e-01, PNorm = 54.6071, GNorm = 0.8552, lr_0 = 6.7197e-04
Loss = 1.1700e-01, PNorm = 54.6248, GNorm = 0.9797, lr_0 = 6.7151e-04
Loss = 1.1753e-01, PNorm = 54.6400, GNorm = 0.6177, lr_0 = 6.7105e-04
Loss = 1.1857e-01, PNorm = 54.6467, GNorm = 0.8472, lr_0 = 6.7059e-04
Loss = 1.4988e-01, PNorm = 54.6592, GNorm = 1.6689, lr_0 = 6.7013e-04
Loss = 1.4484e-01, PNorm = 54.6793, GNorm = 1.6421, lr_0 = 6.6967e-04
Loss = 1.5149e-01, PNorm = 54.7042, GNorm = 0.6936, lr_0 = 6.6921e-04
Loss = 1.2016e-01, PNorm = 54.7248, GNorm = 0.8074, lr_0 = 6.6876e-04
Loss = 1.2648e-01, PNorm = 54.7406, GNorm = 0.5690, lr_0 = 6.6830e-04
Loss = 1.4151e-01, PNorm = 54.7631, GNorm = 1.2199, lr_0 = 6.6784e-04
Loss = 1.2988e-01, PNorm = 54.7828, GNorm = 1.0331, lr_0 = 6.6738e-04
Loss = 1.1954e-01, PNorm = 54.8053, GNorm = 0.9610, lr_0 = 6.6693e-04
Loss = 1.3218e-01, PNorm = 54.8209, GNorm = 1.2962, lr_0 = 6.6647e-04
Loss = 1.3118e-01, PNorm = 54.8415, GNorm = 1.3404, lr_0 = 6.6601e-04
Loss = 1.3182e-01, PNorm = 54.8588, GNorm = 0.7959, lr_0 = 6.6556e-04
Loss = 1.2147e-01, PNorm = 54.8726, GNorm = 0.8988, lr_0 = 6.6510e-04
Loss = 1.3689e-01, PNorm = 54.8805, GNorm = 1.0024, lr_0 = 6.6464e-04
Loss = 1.3199e-01, PNorm = 54.8841, GNorm = 0.8286, lr_0 = 6.6419e-04
Loss = 1.5834e-01, PNorm = 54.8962, GNorm = 0.8417, lr_0 = 6.6373e-04
Loss = 1.4981e-01, PNorm = 54.9138, GNorm = 0.6686, lr_0 = 6.6328e-04
Loss = 1.0819e-01, PNorm = 54.9296, GNorm = 0.7079, lr_0 = 6.6282e-04
Validation mae = 0.456709
Epoch 7
Loss = 1.2764e-01, PNorm = 54.9461, GNorm = 1.4274, lr_0 = 6.6237e-04
Loss = 1.1114e-01, PNorm = 54.9648, GNorm = 1.0460, lr_0 = 6.6192e-04
Loss = 1.1385e-01, PNorm = 54.9840, GNorm = 1.4430, lr_0 = 6.6146e-04
Loss = 1.4411e-01, PNorm = 55.0042, GNorm = 1.4691, lr_0 = 6.6101e-04
Loss = 1.4128e-01, PNorm = 55.0267, GNorm = 1.9842, lr_0 = 6.6056e-04
Loss = 1.3029e-01, PNorm = 55.0448, GNorm = 1.5381, lr_0 = 6.6011e-04
Loss = 1.0935e-01, PNorm = 55.0631, GNorm = 0.8552, lr_0 = 6.5965e-04
Loss = 1.3160e-01, PNorm = 55.0849, GNorm = 1.6879, lr_0 = 6.5920e-04
Loss = 1.1615e-01, PNorm = 55.1074, GNorm = 0.9093, lr_0 = 6.5875e-04
Loss = 1.1457e-01, PNorm = 55.1258, GNorm = 0.5624, lr_0 = 6.5830e-04
Loss = 1.1238e-01, PNorm = 55.1413, GNorm = 0.7267, lr_0 = 6.5785e-04
Loss = 1.1703e-01, PNorm = 55.1590, GNorm = 0.5139, lr_0 = 6.5740e-04
Loss = 1.2401e-01, PNorm = 55.1781, GNorm = 0.7906, lr_0 = 6.5695e-04
Loss = 9.9431e-02, PNorm = 55.1999, GNorm = 0.5677, lr_0 = 6.5650e-04
Loss = 1.1214e-01, PNorm = 55.2152, GNorm = 0.8317, lr_0 = 6.5605e-04
Loss = 1.0559e-01, PNorm = 55.2306, GNorm = 0.6815, lr_0 = 6.5560e-04
Loss = 1.1551e-01, PNorm = 55.2497, GNorm = 0.6478, lr_0 = 6.5515e-04
Loss = 1.2337e-01, PNorm = 55.2576, GNorm = 0.6867, lr_0 = 6.5470e-04
Loss = 1.0290e-01, PNorm = 55.2677, GNorm = 1.1892, lr_0 = 6.5425e-04
Loss = 1.1327e-01, PNorm = 55.2836, GNorm = 1.2137, lr_0 = 6.5380e-04
Loss = 1.0892e-01, PNorm = 55.2970, GNorm = 0.8565, lr_0 = 6.5335e-04
Loss = 1.1710e-01, PNorm = 55.3130, GNorm = 0.7250, lr_0 = 6.5291e-04
Loss = 1.0963e-01, PNorm = 55.3323, GNorm = 0.9396, lr_0 = 6.5246e-04
Loss = 1.0935e-01, PNorm = 55.3419, GNorm = 0.8367, lr_0 = 6.5201e-04
Loss = 1.0630e-01, PNorm = 55.3495, GNorm = 0.9402, lr_0 = 6.5157e-04
Loss = 1.1407e-01, PNorm = 55.3668, GNorm = 0.6719, lr_0 = 6.5112e-04
Loss = 1.3302e-01, PNorm = 55.3877, GNorm = 0.9373, lr_0 = 6.5067e-04
Loss = 1.0130e-01, PNorm = 55.4101, GNorm = 0.7605, lr_0 = 6.5023e-04
Loss = 1.1392e-01, PNorm = 55.4291, GNorm = 1.2275, lr_0 = 6.4978e-04
Loss = 1.0569e-01, PNorm = 55.4526, GNorm = 0.9000, lr_0 = 6.4934e-04
Loss = 1.1388e-01, PNorm = 55.4698, GNorm = 0.5093, lr_0 = 6.4889e-04
Loss = 1.0984e-01, PNorm = 55.4872, GNorm = 0.6222, lr_0 = 6.4845e-04
Loss = 1.1272e-01, PNorm = 55.5021, GNorm = 0.7342, lr_0 = 6.4800e-04
Loss = 1.1168e-01, PNorm = 55.5179, GNorm = 0.7659, lr_0 = 6.4756e-04
Loss = 1.0094e-01, PNorm = 55.5305, GNorm = 0.6296, lr_0 = 6.4712e-04
Loss = 1.1093e-01, PNorm = 55.5393, GNorm = 0.7835, lr_0 = 6.4667e-04
Loss = 9.7304e-02, PNorm = 55.5602, GNorm = 0.6304, lr_0 = 6.4623e-04
Loss = 1.2917e-01, PNorm = 55.5755, GNorm = 0.8998, lr_0 = 6.4579e-04
Loss = 1.1689e-01, PNorm = 55.5943, GNorm = 0.5960, lr_0 = 6.4534e-04
Loss = 1.1256e-01, PNorm = 55.6131, GNorm = 0.5150, lr_0 = 6.4490e-04
Loss = 1.1717e-01, PNorm = 55.6254, GNorm = 0.6652, lr_0 = 6.4446e-04
Loss = 1.0949e-01, PNorm = 55.6391, GNorm = 0.6936, lr_0 = 6.4402e-04
Loss = 1.1254e-01, PNorm = 55.6533, GNorm = 1.2432, lr_0 = 6.4358e-04
Loss = 1.2158e-01, PNorm = 55.6662, GNorm = 0.5557, lr_0 = 6.4314e-04
Loss = 1.2976e-01, PNorm = 55.6790, GNorm = 0.6089, lr_0 = 6.4270e-04
Loss = 1.1308e-01, PNorm = 55.6968, GNorm = 0.9335, lr_0 = 6.4226e-04
Loss = 1.0733e-01, PNorm = 55.7189, GNorm = 0.7535, lr_0 = 6.4182e-04
Loss = 1.0974e-01, PNorm = 55.7294, GNorm = 1.0197, lr_0 = 6.4138e-04
Loss = 1.2888e-01, PNorm = 55.7452, GNorm = 0.6395, lr_0 = 6.4094e-04
Loss = 1.2967e-01, PNorm = 55.7652, GNorm = 2.0844, lr_0 = 6.4050e-04
Loss = 1.2353e-01, PNorm = 55.7805, GNorm = 0.8951, lr_0 = 6.4006e-04
Loss = 9.9486e-02, PNorm = 55.7946, GNorm = 0.6579, lr_0 = 6.3962e-04
Loss = 1.1516e-01, PNorm = 55.8088, GNorm = 0.8238, lr_0 = 6.3918e-04
Loss = 1.0759e-01, PNorm = 55.8255, GNorm = 0.6302, lr_0 = 6.3874e-04
Loss = 1.0026e-01, PNorm = 55.8463, GNorm = 0.7252, lr_0 = 6.3831e-04
Loss = 1.0173e-01, PNorm = 55.8654, GNorm = 0.5631, lr_0 = 6.3787e-04
Loss = 1.0728e-01, PNorm = 55.8843, GNorm = 0.6702, lr_0 = 6.3743e-04
Loss = 1.0483e-01, PNorm = 55.8989, GNorm = 0.8911, lr_0 = 6.3700e-04
Loss = 1.1752e-01, PNorm = 55.9076, GNorm = 0.6225, lr_0 = 6.3656e-04
Loss = 1.0640e-01, PNorm = 55.9168, GNorm = 0.6124, lr_0 = 6.3612e-04
Loss = 1.2573e-01, PNorm = 55.9321, GNorm = 0.9804, lr_0 = 6.3569e-04
Loss = 1.1025e-01, PNorm = 55.9509, GNorm = 0.8041, lr_0 = 6.3525e-04
Loss = 9.1596e-02, PNorm = 55.9657, GNorm = 0.8933, lr_0 = 6.3482e-04
Loss = 1.0501e-01, PNorm = 55.9784, GNorm = 0.5987, lr_0 = 6.3438e-04
Loss = 1.1561e-01, PNorm = 55.9922, GNorm = 0.8660, lr_0 = 6.3395e-04
Loss = 1.2998e-01, PNorm = 56.0099, GNorm = 2.0821, lr_0 = 6.3351e-04
Loss = 1.2330e-01, PNorm = 56.0279, GNorm = 0.5431, lr_0 = 6.3308e-04
Loss = 1.2865e-01, PNorm = 56.0429, GNorm = 0.7326, lr_0 = 6.3265e-04
Loss = 1.3890e-01, PNorm = 56.0638, GNorm = 1.1437, lr_0 = 6.3221e-04
Loss = 1.1841e-01, PNorm = 56.0792, GNorm = 1.2730, lr_0 = 6.3178e-04
Loss = 1.2466e-01, PNorm = 56.0936, GNorm = 1.1107, lr_0 = 6.3135e-04
Loss = 1.2425e-01, PNorm = 56.1082, GNorm = 0.7984, lr_0 = 6.3091e-04
Loss = 1.4020e-01, PNorm = 56.1246, GNorm = 0.5663, lr_0 = 6.3048e-04
Loss = 1.2831e-01, PNorm = 56.1438, GNorm = 0.7890, lr_0 = 6.3005e-04
Loss = 1.1238e-01, PNorm = 56.1633, GNorm = 0.6758, lr_0 = 6.2962e-04
Loss = 1.1104e-01, PNorm = 56.1776, GNorm = 0.6640, lr_0 = 6.2919e-04
Loss = 1.1961e-01, PNorm = 56.1914, GNorm = 1.7143, lr_0 = 6.2876e-04
Loss = 1.1600e-01, PNorm = 56.2092, GNorm = 0.7701, lr_0 = 6.2833e-04
Loss = 1.1133e-01, PNorm = 56.2239, GNorm = 0.7245, lr_0 = 6.2789e-04
Loss = 1.1168e-01, PNorm = 56.2385, GNorm = 0.8130, lr_0 = 6.2746e-04
Loss = 1.2081e-01, PNorm = 56.2554, GNorm = 0.8265, lr_0 = 6.2703e-04
Loss = 1.1420e-01, PNorm = 56.2728, GNorm = 1.0732, lr_0 = 6.2661e-04
Loss = 1.1081e-01, PNorm = 56.2864, GNorm = 0.7255, lr_0 = 6.2618e-04
Loss = 1.1081e-01, PNorm = 56.2991, GNorm = 0.8751, lr_0 = 6.2575e-04
Loss = 1.2310e-01, PNorm = 56.3181, GNorm = 0.7298, lr_0 = 6.2532e-04
Loss = 1.2185e-01, PNorm = 56.3365, GNorm = 1.1628, lr_0 = 6.2489e-04
Loss = 1.2275e-01, PNorm = 56.3487, GNorm = 0.5409, lr_0 = 6.2446e-04
Loss = 1.2480e-01, PNorm = 56.3700, GNorm = 1.2738, lr_0 = 6.2403e-04
Loss = 1.0945e-01, PNorm = 56.3898, GNorm = 0.7669, lr_0 = 6.2361e-04
Loss = 1.0568e-01, PNorm = 56.4006, GNorm = 1.0495, lr_0 = 6.2318e-04
Loss = 1.1703e-01, PNorm = 56.4150, GNorm = 0.4341, lr_0 = 6.2275e-04
Loss = 1.1731e-01, PNorm = 56.4313, GNorm = 0.6496, lr_0 = 6.2233e-04
Loss = 1.4378e-01, PNorm = 56.4475, GNorm = 0.9192, lr_0 = 6.2190e-04
Loss = 1.2542e-01, PNorm = 56.4633, GNorm = 0.9203, lr_0 = 6.2147e-04
Loss = 1.3290e-01, PNorm = 56.4856, GNorm = 2.1014, lr_0 = 6.2105e-04
Loss = 1.3932e-01, PNorm = 56.5094, GNorm = 1.4945, lr_0 = 6.2062e-04
Loss = 1.1694e-01, PNorm = 56.5339, GNorm = 1.1705, lr_0 = 6.2020e-04
Loss = 1.1937e-01, PNorm = 56.5514, GNorm = 1.3767, lr_0 = 6.1977e-04
Loss = 1.2587e-01, PNorm = 56.5721, GNorm = 0.7798, lr_0 = 6.1935e-04
Loss = 1.2687e-01, PNorm = 56.5921, GNorm = 0.6208, lr_0 = 6.1892e-04
Loss = 1.0850e-01, PNorm = 56.6054, GNorm = 0.5416, lr_0 = 6.1850e-04
Loss = 1.0034e-01, PNorm = 56.6179, GNorm = 0.7100, lr_0 = 6.1808e-04
Loss = 1.2284e-01, PNorm = 56.6294, GNorm = 0.9406, lr_0 = 6.1765e-04
Loss = 1.0525e-01, PNorm = 56.6408, GNorm = 0.6941, lr_0 = 6.1723e-04
Loss = 1.3395e-01, PNorm = 56.6533, GNorm = 0.7705, lr_0 = 6.1681e-04
Loss = 1.0588e-01, PNorm = 56.6657, GNorm = 0.8845, lr_0 = 6.1638e-04
Loss = 1.1775e-01, PNorm = 56.6825, GNorm = 1.1442, lr_0 = 6.1596e-04
Loss = 1.2746e-01, PNorm = 56.7012, GNorm = 2.3787, lr_0 = 6.1554e-04
Loss = 1.4763e-01, PNorm = 56.7240, GNorm = 1.2817, lr_0 = 6.1512e-04
Loss = 1.1858e-01, PNorm = 56.7462, GNorm = 1.0279, lr_0 = 6.1470e-04
Loss = 1.2834e-01, PNorm = 56.7620, GNorm = 0.7003, lr_0 = 6.1428e-04
Loss = 1.1953e-01, PNorm = 56.7752, GNorm = 0.9490, lr_0 = 6.1385e-04
Loss = 1.1964e-01, PNorm = 56.7894, GNorm = 0.7559, lr_0 = 6.1343e-04
Loss = 1.2167e-01, PNorm = 56.8090, GNorm = 0.7105, lr_0 = 6.1301e-04
Loss = 1.1203e-01, PNorm = 56.8276, GNorm = 0.5853, lr_0 = 6.1259e-04
Loss = 1.0852e-01, PNorm = 56.8427, GNorm = 0.6712, lr_0 = 6.1217e-04
Loss = 1.1682e-01, PNorm = 56.8551, GNorm = 0.7463, lr_0 = 6.1175e-04
Loss = 9.8376e-02, PNorm = 56.8654, GNorm = 0.4517, lr_0 = 6.1134e-04
Loss = 1.2224e-01, PNorm = 56.8792, GNorm = 1.0432, lr_0 = 6.1092e-04
Loss = 1.2704e-01, PNorm = 56.8920, GNorm = 0.9005, lr_0 = 6.1050e-04
Validation mae = 0.443685
Epoch 8
Loss = 9.8038e-02, PNorm = 56.9101, GNorm = 0.8390, lr_0 = 6.1008e-04
Loss = 9.6310e-02, PNorm = 56.9320, GNorm = 0.9466, lr_0 = 6.0966e-04
Loss = 1.3332e-01, PNorm = 56.9554, GNorm = 0.7641, lr_0 = 6.0924e-04
Loss = 1.0403e-01, PNorm = 56.9737, GNorm = 1.0166, lr_0 = 6.0883e-04
Loss = 1.1139e-01, PNorm = 56.9899, GNorm = 0.7226, lr_0 = 6.0841e-04
Loss = 8.9992e-02, PNorm = 57.0069, GNorm = 0.6433, lr_0 = 6.0799e-04
Loss = 9.8754e-02, PNorm = 57.0218, GNorm = 1.4075, lr_0 = 6.0758e-04
Loss = 1.0251e-01, PNorm = 57.0346, GNorm = 0.5747, lr_0 = 6.0716e-04
Loss = 9.2927e-02, PNorm = 57.0490, GNorm = 0.6580, lr_0 = 6.0674e-04
Loss = 1.1017e-01, PNorm = 57.0629, GNorm = 0.5302, lr_0 = 6.0633e-04
Loss = 1.1054e-01, PNorm = 57.0842, GNorm = 0.6425, lr_0 = 6.0591e-04
Loss = 1.0455e-01, PNorm = 57.1026, GNorm = 0.6043, lr_0 = 6.0550e-04
Loss = 1.0116e-01, PNorm = 57.1171, GNorm = 0.9800, lr_0 = 6.0508e-04
Loss = 1.1683e-01, PNorm = 57.1302, GNorm = 0.5430, lr_0 = 6.0467e-04
Loss = 1.3597e-01, PNorm = 57.1452, GNorm = 1.7744, lr_0 = 6.0425e-04
Loss = 9.5989e-02, PNorm = 57.1664, GNorm = 0.5498, lr_0 = 6.0384e-04
Loss = 1.1488e-01, PNorm = 57.1861, GNorm = 1.0039, lr_0 = 6.0343e-04
Loss = 1.0153e-01, PNorm = 57.2014, GNorm = 0.7787, lr_0 = 6.0301e-04
Loss = 1.1313e-01, PNorm = 57.2110, GNorm = 1.2168, lr_0 = 6.0260e-04
Loss = 1.0392e-01, PNorm = 57.2214, GNorm = 0.6841, lr_0 = 6.0219e-04
Loss = 1.0915e-01, PNorm = 57.2338, GNorm = 0.8405, lr_0 = 6.0178e-04
Loss = 1.0404e-01, PNorm = 57.2502, GNorm = 1.4784, lr_0 = 6.0136e-04
Loss = 1.1439e-01, PNorm = 57.2645, GNorm = 0.4285, lr_0 = 6.0095e-04
Loss = 9.7312e-02, PNorm = 57.2773, GNorm = 0.9380, lr_0 = 6.0054e-04
Loss = 1.0994e-01, PNorm = 57.2918, GNorm = 0.7019, lr_0 = 6.0013e-04
Loss = 1.1408e-01, PNorm = 57.3054, GNorm = 1.6476, lr_0 = 5.9972e-04
Loss = 1.1306e-01, PNorm = 57.3192, GNorm = 0.8097, lr_0 = 5.9931e-04
Loss = 1.3092e-01, PNorm = 57.3356, GNorm = 1.5240, lr_0 = 5.9890e-04
Loss = 1.2485e-01, PNorm = 57.3553, GNorm = 0.7082, lr_0 = 5.9849e-04
Loss = 1.1442e-01, PNorm = 57.3750, GNorm = 0.7129, lr_0 = 5.9808e-04
Loss = 1.0652e-01, PNorm = 57.3945, GNorm = 0.8797, lr_0 = 5.9767e-04
Loss = 1.0218e-01, PNorm = 57.4126, GNorm = 0.7415, lr_0 = 5.9726e-04
Loss = 1.1585e-01, PNorm = 57.4300, GNorm = 0.9209, lr_0 = 5.9685e-04
Loss = 1.2047e-01, PNorm = 57.4416, GNorm = 0.6810, lr_0 = 5.9644e-04
Loss = 1.0990e-01, PNorm = 57.4509, GNorm = 0.8381, lr_0 = 5.9603e-04
Loss = 1.0710e-01, PNorm = 57.4638, GNorm = 0.6541, lr_0 = 5.9562e-04
Loss = 9.9916e-02, PNorm = 57.4825, GNorm = 0.8493, lr_0 = 5.9521e-04
Loss = 9.9062e-02, PNorm = 57.4951, GNorm = 0.6789, lr_0 = 5.9481e-04
Loss = 1.0400e-01, PNorm = 57.5099, GNorm = 0.7265, lr_0 = 5.9440e-04
Loss = 1.1494e-01, PNorm = 57.5280, GNorm = 0.5221, lr_0 = 5.9399e-04
Loss = 1.3576e-01, PNorm = 57.5478, GNorm = 0.5997, lr_0 = 5.9358e-04
Loss = 1.3364e-01, PNorm = 57.5643, GNorm = 1.6586, lr_0 = 5.9318e-04
Loss = 1.0544e-01, PNorm = 57.5853, GNorm = 0.9010, lr_0 = 5.9277e-04
Loss = 1.0305e-01, PNorm = 57.6015, GNorm = 0.7840, lr_0 = 5.9236e-04
Loss = 1.0917e-01, PNorm = 57.6156, GNorm = 1.0457, lr_0 = 5.9196e-04
Loss = 1.0788e-01, PNorm = 57.6399, GNorm = 0.7451, lr_0 = 5.9155e-04
Loss = 1.0907e-01, PNorm = 57.6555, GNorm = 1.2115, lr_0 = 5.9115e-04
Loss = 1.0304e-01, PNorm = 57.6667, GNorm = 0.8787, lr_0 = 5.9074e-04
Loss = 9.4533e-02, PNorm = 57.6772, GNorm = 0.6897, lr_0 = 5.9034e-04
Loss = 9.3148e-02, PNorm = 57.6897, GNorm = 0.6169, lr_0 = 5.8993e-04
Loss = 9.4903e-02, PNorm = 57.7035, GNorm = 0.6085, lr_0 = 5.8953e-04
Loss = 1.1122e-01, PNorm = 57.7174, GNorm = 0.7314, lr_0 = 5.8913e-04
Loss = 1.0925e-01, PNorm = 57.7319, GNorm = 1.6544, lr_0 = 5.8872e-04
Loss = 1.1507e-01, PNorm = 57.7486, GNorm = 1.0223, lr_0 = 5.8832e-04
Loss = 1.1333e-01, PNorm = 57.7610, GNorm = 0.9612, lr_0 = 5.8792e-04
Loss = 1.1473e-01, PNorm = 57.7745, GNorm = 0.6173, lr_0 = 5.8751e-04
Loss = 1.0236e-01, PNorm = 57.7888, GNorm = 0.6961, lr_0 = 5.8711e-04
Loss = 1.0126e-01, PNorm = 57.8018, GNorm = 0.9655, lr_0 = 5.8671e-04
Loss = 1.0931e-01, PNorm = 57.8194, GNorm = 1.5110, lr_0 = 5.8631e-04
Loss = 1.0933e-01, PNorm = 57.8372, GNorm = 0.9009, lr_0 = 5.8591e-04
Loss = 1.0656e-01, PNorm = 57.8543, GNorm = 0.5168, lr_0 = 5.8550e-04
Loss = 1.0492e-01, PNorm = 57.8735, GNorm = 0.5507, lr_0 = 5.8510e-04
Loss = 9.8052e-02, PNorm = 57.8882, GNorm = 0.7085, lr_0 = 5.8470e-04
Loss = 1.0624e-01, PNorm = 57.9014, GNorm = 0.5059, lr_0 = 5.8430e-04
Loss = 1.0990e-01, PNorm = 57.9173, GNorm = 0.7120, lr_0 = 5.8390e-04
Loss = 1.0933e-01, PNorm = 57.9313, GNorm = 1.0184, lr_0 = 5.8350e-04
Loss = 1.2221e-01, PNorm = 57.9451, GNorm = 0.6538, lr_0 = 5.8310e-04
Loss = 1.1117e-01, PNorm = 57.9608, GNorm = 0.9536, lr_0 = 5.8270e-04
Loss = 1.1533e-01, PNorm = 57.9815, GNorm = 1.0417, lr_0 = 5.8230e-04
Loss = 1.0974e-01, PNorm = 57.9931, GNorm = 0.6581, lr_0 = 5.8190e-04
Loss = 1.1942e-01, PNorm = 58.0025, GNorm = 0.6993, lr_0 = 5.8151e-04
Loss = 1.0220e-01, PNorm = 58.0153, GNorm = 0.5242, lr_0 = 5.8111e-04
Loss = 1.0142e-01, PNorm = 58.0262, GNorm = 0.5073, lr_0 = 5.8071e-04
Loss = 1.0839e-01, PNorm = 58.0416, GNorm = 0.6108, lr_0 = 5.8031e-04
Loss = 1.0493e-01, PNorm = 58.0559, GNorm = 0.8410, lr_0 = 5.7991e-04
Loss = 1.1396e-01, PNorm = 58.0691, GNorm = 0.8450, lr_0 = 5.7952e-04
Loss = 9.7462e-02, PNorm = 58.0817, GNorm = 0.6904, lr_0 = 5.7912e-04
Loss = 1.2322e-01, PNorm = 58.0948, GNorm = 1.2353, lr_0 = 5.7872e-04
Loss = 1.1742e-01, PNorm = 58.1081, GNorm = 2.1835, lr_0 = 5.7833e-04
Loss = 1.1157e-01, PNorm = 58.1279, GNorm = 1.2375, lr_0 = 5.7793e-04
Loss = 1.0507e-01, PNorm = 58.1455, GNorm = 1.4858, lr_0 = 5.7753e-04
Loss = 1.0736e-01, PNorm = 58.1602, GNorm = 0.9025, lr_0 = 5.7714e-04
Loss = 1.2148e-01, PNorm = 58.1713, GNorm = 1.0013, lr_0 = 5.7674e-04
Loss = 1.0940e-01, PNorm = 58.1889, GNorm = 0.7239, lr_0 = 5.7635e-04
Loss = 1.1764e-01, PNorm = 58.2067, GNorm = 0.7616, lr_0 = 5.7595e-04
Loss = 1.0829e-01, PNorm = 58.2206, GNorm = 0.6713, lr_0 = 5.7556e-04
Loss = 1.1322e-01, PNorm = 58.2404, GNorm = 1.1845, lr_0 = 5.7516e-04
Loss = 1.1123e-01, PNorm = 58.2556, GNorm = 0.8079, lr_0 = 5.7477e-04
Loss = 1.0922e-01, PNorm = 58.2690, GNorm = 0.4947, lr_0 = 5.7438e-04
Loss = 1.0780e-01, PNorm = 58.2861, GNorm = 0.5174, lr_0 = 5.7398e-04
Loss = 1.1419e-01, PNorm = 58.2982, GNorm = 0.5106, lr_0 = 5.7359e-04
Loss = 1.0551e-01, PNorm = 58.3041, GNorm = 0.8179, lr_0 = 5.7320e-04
Loss = 1.0342e-01, PNorm = 58.3177, GNorm = 0.6298, lr_0 = 5.7280e-04
Loss = 1.3184e-01, PNorm = 58.3322, GNorm = 1.2466, lr_0 = 5.7241e-04
Loss = 9.8157e-02, PNorm = 58.3470, GNorm = 0.8903, lr_0 = 5.7202e-04
Loss = 1.1154e-01, PNorm = 58.3611, GNorm = 0.5767, lr_0 = 5.7163e-04
Loss = 9.9343e-02, PNorm = 58.3771, GNorm = 0.9543, lr_0 = 5.7124e-04
Loss = 9.2727e-02, PNorm = 58.3889, GNorm = 0.7546, lr_0 = 5.7084e-04
Loss = 1.0935e-01, PNorm = 58.3976, GNorm = 0.5922, lr_0 = 5.7045e-04
Loss = 9.8507e-02, PNorm = 58.4088, GNorm = 1.0801, lr_0 = 5.7006e-04
Loss = 1.1532e-01, PNorm = 58.4250, GNorm = 1.6444, lr_0 = 5.6967e-04
Loss = 1.0695e-01, PNorm = 58.4400, GNorm = 0.6388, lr_0 = 5.6928e-04
Loss = 1.2340e-01, PNorm = 58.4516, GNorm = 1.1512, lr_0 = 5.6889e-04
Loss = 9.9115e-02, PNorm = 58.4623, GNorm = 0.9182, lr_0 = 5.6850e-04
Loss = 1.0522e-01, PNorm = 58.4790, GNorm = 0.7542, lr_0 = 5.6811e-04
Loss = 1.0557e-01, PNorm = 58.4942, GNorm = 0.7141, lr_0 = 5.6772e-04
Loss = 9.8396e-02, PNorm = 58.5078, GNorm = 0.4947, lr_0 = 5.6733e-04
Loss = 8.9817e-02, PNorm = 58.5155, GNorm = 0.5673, lr_0 = 5.6695e-04
Loss = 9.5476e-02, PNorm = 58.5268, GNorm = 0.7765, lr_0 = 5.6656e-04
Loss = 9.6258e-02, PNorm = 58.5384, GNorm = 1.2868, lr_0 = 5.6617e-04
Loss = 1.1653e-01, PNorm = 58.5488, GNorm = 1.4957, lr_0 = 5.6578e-04
Loss = 1.2065e-01, PNorm = 58.5623, GNorm = 1.2061, lr_0 = 5.6539e-04
Loss = 1.1236e-01, PNorm = 58.5753, GNorm = 0.6893, lr_0 = 5.6501e-04
Loss = 9.5937e-02, PNorm = 58.5929, GNorm = 0.5112, lr_0 = 5.6462e-04
Loss = 1.0197e-01, PNorm = 58.6118, GNorm = 0.8862, lr_0 = 5.6423e-04
Loss = 1.1021e-01, PNorm = 58.6266, GNorm = 0.9185, lr_0 = 5.6385e-04
Loss = 1.0053e-01, PNorm = 58.6392, GNorm = 0.8197, lr_0 = 5.6346e-04
Loss = 1.2583e-01, PNorm = 58.6543, GNorm = 0.8210, lr_0 = 5.6307e-04
Loss = 9.8761e-02, PNorm = 58.6653, GNorm = 0.7247, lr_0 = 5.6269e-04
Loss = 1.2856e-01, PNorm = 58.6793, GNorm = 1.0827, lr_0 = 5.6230e-04
Validation mae = 0.434768
Epoch 9
Loss = 1.0887e-01, PNorm = 58.6958, GNorm = 0.4289, lr_0 = 5.6192e-04
Loss = 9.5277e-02, PNorm = 58.7159, GNorm = 1.0336, lr_0 = 5.6153e-04
Loss = 1.0458e-01, PNorm = 58.7372, GNorm = 1.3800, lr_0 = 5.6115e-04
Loss = 8.9936e-02, PNorm = 58.7518, GNorm = 0.8792, lr_0 = 5.6076e-04
Loss = 1.1059e-01, PNorm = 58.7675, GNorm = 0.7126, lr_0 = 5.6038e-04
Loss = 9.2202e-02, PNorm = 58.7861, GNorm = 1.1056, lr_0 = 5.6000e-04
Loss = 1.1452e-01, PNorm = 58.7976, GNorm = 1.3015, lr_0 = 5.5961e-04
Loss = 1.0220e-01, PNorm = 58.8094, GNorm = 0.7608, lr_0 = 5.5923e-04
Loss = 9.9396e-02, PNorm = 58.8241, GNorm = 0.8714, lr_0 = 5.5885e-04
Loss = 1.0101e-01, PNorm = 58.8402, GNorm = 0.4736, lr_0 = 5.5846e-04
Loss = 9.9856e-02, PNorm = 58.8575, GNorm = 0.5442, lr_0 = 5.5808e-04
Loss = 9.6755e-02, PNorm = 58.8740, GNorm = 0.6248, lr_0 = 5.5770e-04
Loss = 8.9637e-02, PNorm = 58.8836, GNorm = 0.5382, lr_0 = 5.5732e-04
Loss = 9.9686e-02, PNorm = 58.8914, GNorm = 0.6304, lr_0 = 5.5693e-04
Loss = 7.9720e-02, PNorm = 58.9011, GNorm = 0.4838, lr_0 = 5.5655e-04
Loss = 9.6834e-02, PNorm = 58.9138, GNorm = 0.8649, lr_0 = 5.5617e-04
Loss = 1.0786e-01, PNorm = 58.9268, GNorm = 0.7435, lr_0 = 5.5579e-04
Loss = 1.0027e-01, PNorm = 58.9413, GNorm = 0.5395, lr_0 = 5.5541e-04
Loss = 9.3240e-02, PNorm = 58.9520, GNorm = 0.8157, lr_0 = 5.5503e-04
Loss = 8.9607e-02, PNorm = 58.9602, GNorm = 0.9483, lr_0 = 5.5465e-04
Loss = 9.8557e-02, PNorm = 58.9753, GNorm = 0.5655, lr_0 = 5.5427e-04
Loss = 1.0824e-01, PNorm = 58.9952, GNorm = 0.8851, lr_0 = 5.5389e-04
Loss = 1.0947e-01, PNorm = 59.0100, GNorm = 0.7500, lr_0 = 5.5351e-04
Loss = 1.0243e-01, PNorm = 59.0231, GNorm = 0.5497, lr_0 = 5.5313e-04
Loss = 1.0106e-01, PNorm = 59.0378, GNorm = 0.6898, lr_0 = 5.5275e-04
Loss = 9.3467e-02, PNorm = 59.0513, GNorm = 0.5243, lr_0 = 5.5237e-04
Loss = 1.0288e-01, PNorm = 59.0608, GNorm = 0.5857, lr_0 = 5.5199e-04
Loss = 1.1279e-01, PNorm = 59.0743, GNorm = 1.2798, lr_0 = 5.5162e-04
Loss = 1.0307e-01, PNorm = 59.0889, GNorm = 1.3326, lr_0 = 5.5124e-04
Loss = 8.7926e-02, PNorm = 59.1046, GNorm = 0.5665, lr_0 = 5.5086e-04
Loss = 1.1103e-01, PNorm = 59.1185, GNorm = 2.0285, lr_0 = 5.5048e-04
Loss = 9.8014e-02, PNorm = 59.1344, GNorm = 1.4616, lr_0 = 5.5011e-04
Loss = 9.6841e-02, PNorm = 59.1520, GNorm = 0.6572, lr_0 = 5.4973e-04
Loss = 9.6521e-02, PNorm = 59.1691, GNorm = 0.6916, lr_0 = 5.4935e-04
Loss = 9.7886e-02, PNorm = 59.1838, GNorm = 0.7243, lr_0 = 5.4898e-04
Loss = 1.0890e-01, PNorm = 59.1906, GNorm = 0.8138, lr_0 = 5.4860e-04
Loss = 1.0027e-01, PNorm = 59.1958, GNorm = 0.5732, lr_0 = 5.4822e-04
Loss = 1.0053e-01, PNorm = 59.2054, GNorm = 0.5491, lr_0 = 5.4785e-04
Loss = 8.6376e-02, PNorm = 59.2221, GNorm = 0.9066, lr_0 = 5.4747e-04
Loss = 9.9646e-02, PNorm = 59.2368, GNorm = 1.0869, lr_0 = 5.4710e-04
Loss = 1.1464e-01, PNorm = 59.2500, GNorm = 0.8490, lr_0 = 5.4672e-04
Loss = 1.0516e-01, PNorm = 59.2647, GNorm = 1.5129, lr_0 = 5.4635e-04
Loss = 1.0777e-01, PNorm = 59.2843, GNorm = 0.9837, lr_0 = 5.4597e-04
Loss = 1.1216e-01, PNorm = 59.3019, GNorm = 1.0419, lr_0 = 5.4560e-04
Loss = 9.0794e-02, PNorm = 59.3107, GNorm = 0.6534, lr_0 = 5.4523e-04
Loss = 9.3725e-02, PNorm = 59.3182, GNorm = 0.8524, lr_0 = 5.4485e-04
Loss = 8.6635e-02, PNorm = 59.3290, GNorm = 0.6845, lr_0 = 5.4448e-04
Loss = 9.1358e-02, PNorm = 59.3384, GNorm = 0.9097, lr_0 = 5.4411e-04
Loss = 1.0668e-01, PNorm = 59.3447, GNorm = 1.1600, lr_0 = 5.4373e-04
Loss = 1.0378e-01, PNorm = 59.3560, GNorm = 0.5240, lr_0 = 5.4336e-04
Loss = 1.1159e-01, PNorm = 59.3701, GNorm = 0.7719, lr_0 = 5.4299e-04
Loss = 9.5876e-02, PNorm = 59.3843, GNorm = 0.6840, lr_0 = 5.4262e-04
Loss = 9.6761e-02, PNorm = 59.3977, GNorm = 0.6322, lr_0 = 5.4225e-04
Loss = 1.0177e-01, PNorm = 59.4092, GNorm = 0.6379, lr_0 = 5.4187e-04
Loss = 9.9547e-02, PNorm = 59.4259, GNorm = 1.0104, lr_0 = 5.4150e-04
Loss = 8.8391e-02, PNorm = 59.4401, GNorm = 0.4978, lr_0 = 5.4113e-04
Loss = 8.5024e-02, PNorm = 59.4502, GNorm = 0.5900, lr_0 = 5.4076e-04
Loss = 9.6030e-02, PNorm = 59.4612, GNorm = 0.9380, lr_0 = 5.4039e-04
Loss = 9.5468e-02, PNorm = 59.4744, GNorm = 1.0323, lr_0 = 5.4002e-04
Loss = 9.6131e-02, PNorm = 59.4903, GNorm = 0.4826, lr_0 = 5.3965e-04
Loss = 1.0139e-01, PNorm = 59.5041, GNorm = 1.0093, lr_0 = 5.3928e-04
Loss = 9.6861e-02, PNorm = 59.5159, GNorm = 1.1819, lr_0 = 5.3891e-04
Loss = 1.0325e-01, PNorm = 59.5296, GNorm = 1.0322, lr_0 = 5.3854e-04
Loss = 1.2958e-01, PNorm = 59.5494, GNorm = 0.9036, lr_0 = 5.3817e-04
Loss = 1.1560e-01, PNorm = 59.5652, GNorm = 1.6244, lr_0 = 5.3781e-04
Loss = 1.0695e-01, PNorm = 59.5800, GNorm = 0.8304, lr_0 = 5.3744e-04
Loss = 1.0301e-01, PNorm = 59.5887, GNorm = 0.8307, lr_0 = 5.3707e-04
Loss = 9.8367e-02, PNorm = 59.6006, GNorm = 0.6708, lr_0 = 5.3670e-04
Loss = 9.9656e-02, PNorm = 59.6165, GNorm = 0.7103, lr_0 = 5.3633e-04
Loss = 1.0675e-01, PNorm = 59.6309, GNorm = 0.5132, lr_0 = 5.3597e-04
Loss = 1.0481e-01, PNorm = 59.6470, GNorm = 0.7448, lr_0 = 5.3560e-04
Loss = 9.2414e-02, PNorm = 59.6602, GNorm = 0.9867, lr_0 = 5.3523e-04
Loss = 9.8074e-02, PNorm = 59.6765, GNorm = 0.7591, lr_0 = 5.3486e-04
Loss = 9.8159e-02, PNorm = 59.6892, GNorm = 0.6938, lr_0 = 5.3450e-04
Loss = 9.3206e-02, PNorm = 59.7000, GNorm = 1.3383, lr_0 = 5.3413e-04
Loss = 9.2957e-02, PNorm = 59.7146, GNorm = 0.7343, lr_0 = 5.3377e-04
Loss = 1.0435e-01, PNorm = 59.7304, GNorm = 0.7761, lr_0 = 5.3340e-04
Loss = 1.0012e-01, PNorm = 59.7435, GNorm = 0.7273, lr_0 = 5.3304e-04
Loss = 9.4892e-02, PNorm = 59.7477, GNorm = 0.8774, lr_0 = 5.3267e-04
Loss = 9.7983e-02, PNorm = 59.7568, GNorm = 0.6156, lr_0 = 5.3231e-04
Loss = 9.3634e-02, PNorm = 59.7746, GNorm = 0.6680, lr_0 = 5.3194e-04
Loss = 1.0001e-01, PNorm = 59.7931, GNorm = 1.0345, lr_0 = 5.3158e-04
Loss = 1.0003e-01, PNorm = 59.8089, GNorm = 0.6147, lr_0 = 5.3121e-04
Loss = 9.0010e-02, PNorm = 59.8190, GNorm = 0.8021, lr_0 = 5.3085e-04
Loss = 1.0971e-01, PNorm = 59.8293, GNorm = 0.6122, lr_0 = 5.3048e-04
Loss = 9.8711e-02, PNorm = 59.8404, GNorm = 0.5997, lr_0 = 5.3012e-04
Loss = 1.0933e-01, PNorm = 59.8542, GNorm = 0.9453, lr_0 = 5.2976e-04
Loss = 1.4423e-01, PNorm = 59.8754, GNorm = 0.8480, lr_0 = 5.2939e-04
Loss = 1.1758e-01, PNorm = 59.8948, GNorm = 0.7928, lr_0 = 5.2903e-04
Loss = 1.0245e-01, PNorm = 59.9081, GNorm = 1.0633, lr_0 = 5.2867e-04
Loss = 1.1101e-01, PNorm = 59.9238, GNorm = 1.0103, lr_0 = 5.2831e-04
Loss = 1.1191e-01, PNorm = 59.9399, GNorm = 0.6893, lr_0 = 5.2795e-04
Loss = 9.4304e-02, PNorm = 59.9543, GNorm = 1.2801, lr_0 = 5.2758e-04
Loss = 9.0653e-02, PNorm = 59.9681, GNorm = 1.5801, lr_0 = 5.2722e-04
Loss = 7.7893e-02, PNorm = 59.9819, GNorm = 0.5025, lr_0 = 5.2686e-04
Loss = 9.6226e-02, PNorm = 59.9935, GNorm = 1.0146, lr_0 = 5.2650e-04
Loss = 8.9694e-02, PNorm = 60.0020, GNorm = 0.9441, lr_0 = 5.2614e-04
Loss = 1.0453e-01, PNorm = 60.0128, GNorm = 0.6707, lr_0 = 5.2578e-04
Loss = 1.0815e-01, PNorm = 60.0230, GNorm = 1.3683, lr_0 = 5.2542e-04
Loss = 9.7582e-02, PNorm = 60.0351, GNorm = 1.7005, lr_0 = 5.2506e-04
Loss = 1.1985e-01, PNorm = 60.0480, GNorm = 1.0962, lr_0 = 5.2470e-04
Loss = 9.6740e-02, PNorm = 60.0652, GNorm = 0.5976, lr_0 = 5.2434e-04
Loss = 8.5402e-02, PNorm = 60.0802, GNorm = 0.4885, lr_0 = 5.2398e-04
Loss = 1.1222e-01, PNorm = 60.0951, GNorm = 0.6240, lr_0 = 5.2362e-04
Loss = 9.3938e-02, PNorm = 60.1078, GNorm = 1.2589, lr_0 = 5.2326e-04
Loss = 1.0201e-01, PNorm = 60.1203, GNorm = 0.4509, lr_0 = 5.2290e-04
Loss = 1.0765e-01, PNorm = 60.1299, GNorm = 0.5375, lr_0 = 5.2255e-04
Loss = 1.0755e-01, PNorm = 60.1419, GNorm = 0.6222, lr_0 = 5.2219e-04
Loss = 1.1235e-01, PNorm = 60.1556, GNorm = 0.9237, lr_0 = 5.2183e-04
Loss = 9.6779e-02, PNorm = 60.1676, GNorm = 0.5551, lr_0 = 5.2147e-04
Loss = 1.0593e-01, PNorm = 60.1778, GNorm = 1.0243, lr_0 = 5.2112e-04
Loss = 1.0180e-01, PNorm = 60.1847, GNorm = 0.7379, lr_0 = 5.2076e-04
Loss = 1.0080e-01, PNorm = 60.1967, GNorm = 0.5904, lr_0 = 5.2040e-04
Loss = 1.0218e-01, PNorm = 60.2121, GNorm = 0.5253, lr_0 = 5.2005e-04
Loss = 9.1621e-02, PNorm = 60.2199, GNorm = 0.7055, lr_0 = 5.1969e-04
Loss = 9.5943e-02, PNorm = 60.2290, GNorm = 0.6732, lr_0 = 5.1933e-04
Loss = 1.0228e-01, PNorm = 60.2396, GNorm = 0.6520, lr_0 = 5.1898e-04
Loss = 9.9718e-02, PNorm = 60.2506, GNorm = 0.7095, lr_0 = 5.1862e-04
Loss = 9.4415e-02, PNorm = 60.2618, GNorm = 0.5082, lr_0 = 5.1827e-04
Loss = 9.1303e-02, PNorm = 60.2733, GNorm = 0.7129, lr_0 = 5.1791e-04
Validation mae = 0.418950
Epoch 10
Loss = 9.5440e-02, PNorm = 60.2911, GNorm = 0.7937, lr_0 = 5.1756e-04
Loss = 8.6598e-02, PNorm = 60.3090, GNorm = 0.7621, lr_0 = 5.1720e-04
Loss = 8.9485e-02, PNorm = 60.3255, GNorm = 0.5906, lr_0 = 5.1685e-04
Loss = 8.1300e-02, PNorm = 60.3415, GNorm = 0.4540, lr_0 = 5.1649e-04
Loss = 9.0051e-02, PNorm = 60.3529, GNorm = 0.5638, lr_0 = 5.1614e-04
Loss = 9.4553e-02, PNorm = 60.3657, GNorm = 0.5596, lr_0 = 5.1579e-04
Loss = 8.6060e-02, PNorm = 60.3799, GNorm = 0.7943, lr_0 = 5.1543e-04
Loss = 9.0995e-02, PNorm = 60.3910, GNorm = 0.8629, lr_0 = 5.1508e-04
Loss = 9.2954e-02, PNorm = 60.4002, GNorm = 1.1924, lr_0 = 5.1473e-04
Loss = 9.3245e-02, PNorm = 60.4131, GNorm = 0.5723, lr_0 = 5.1437e-04
Loss = 9.1088e-02, PNorm = 60.4236, GNorm = 0.8110, lr_0 = 5.1402e-04
Loss = 9.4972e-02, PNorm = 60.4343, GNorm = 0.9683, lr_0 = 5.1367e-04
Loss = 9.1579e-02, PNorm = 60.4475, GNorm = 0.6245, lr_0 = 5.1332e-04
Loss = 1.0877e-01, PNorm = 60.4662, GNorm = 0.7248, lr_0 = 5.1297e-04
Loss = 1.1197e-01, PNorm = 60.4807, GNorm = 0.9394, lr_0 = 5.1262e-04
Loss = 9.5847e-02, PNorm = 60.4933, GNorm = 0.6679, lr_0 = 5.1226e-04
Loss = 1.0091e-01, PNorm = 60.5056, GNorm = 0.5419, lr_0 = 5.1191e-04
Loss = 8.3052e-02, PNorm = 60.5211, GNorm = 0.7904, lr_0 = 5.1156e-04
Loss = 7.6172e-02, PNorm = 60.5346, GNorm = 0.5103, lr_0 = 5.1121e-04
Loss = 8.6899e-02, PNorm = 60.5467, GNorm = 0.5325, lr_0 = 5.1086e-04
Loss = 9.1634e-02, PNorm = 60.5646, GNorm = 1.2652, lr_0 = 5.1051e-04
Loss = 8.4820e-02, PNorm = 60.5829, GNorm = 0.7342, lr_0 = 5.1016e-04
Loss = 9.5331e-02, PNorm = 60.5933, GNorm = 1.2114, lr_0 = 5.0981e-04
Loss = 8.5776e-02, PNorm = 60.6036, GNorm = 0.6855, lr_0 = 5.0946e-04
Loss = 9.3762e-02, PNorm = 60.6195, GNorm = 1.2699, lr_0 = 5.0911e-04
Loss = 9.1742e-02, PNorm = 60.6334, GNorm = 0.7402, lr_0 = 5.0877e-04
Loss = 9.5967e-02, PNorm = 60.6440, GNorm = 0.7011, lr_0 = 5.0842e-04
Loss = 9.1937e-02, PNorm = 60.6549, GNorm = 0.7247, lr_0 = 5.0807e-04
Loss = 9.7796e-02, PNorm = 60.6687, GNorm = 0.7291, lr_0 = 5.0772e-04
Loss = 8.6581e-02, PNorm = 60.6832, GNorm = 0.7817, lr_0 = 5.0737e-04
Loss = 1.0048e-01, PNorm = 60.6938, GNorm = 0.4747, lr_0 = 5.0703e-04
Loss = 9.0169e-02, PNorm = 60.7090, GNorm = 0.6835, lr_0 = 5.0668e-04
Loss = 9.7028e-02, PNorm = 60.7251, GNorm = 0.5081, lr_0 = 5.0633e-04
Loss = 9.9588e-02, PNorm = 60.7376, GNorm = 0.7510, lr_0 = 5.0598e-04
Loss = 9.8151e-02, PNorm = 60.7535, GNorm = 0.7490, lr_0 = 5.0564e-04
Loss = 8.9305e-02, PNorm = 60.7691, GNorm = 0.5723, lr_0 = 5.0529e-04
Loss = 9.3752e-02, PNorm = 60.7800, GNorm = 0.5399, lr_0 = 5.0494e-04
Loss = 8.7478e-02, PNorm = 60.7922, GNorm = 0.5840, lr_0 = 5.0460e-04
Loss = 8.3566e-02, PNorm = 60.8044, GNorm = 0.6953, lr_0 = 5.0425e-04
Loss = 8.2895e-02, PNorm = 60.8122, GNorm = 0.6509, lr_0 = 5.0391e-04
Loss = 8.7362e-02, PNorm = 60.8184, GNorm = 0.7140, lr_0 = 5.0356e-04
Loss = 9.5177e-02, PNorm = 60.8326, GNorm = 0.5275, lr_0 = 5.0322e-04
Loss = 8.5218e-02, PNorm = 60.8443, GNorm = 0.7160, lr_0 = 5.0287e-04
Loss = 8.1501e-02, PNorm = 60.8559, GNorm = 0.4178, lr_0 = 5.0253e-04
Loss = 9.0040e-02, PNorm = 60.8670, GNorm = 0.5177, lr_0 = 5.0218e-04
Loss = 8.7412e-02, PNorm = 60.8807, GNorm = 0.5442, lr_0 = 5.0184e-04
Loss = 8.2773e-02, PNorm = 60.8940, GNorm = 0.6104, lr_0 = 5.0150e-04
Loss = 7.9233e-02, PNorm = 60.9069, GNorm = 0.8143, lr_0 = 5.0115e-04
Loss = 9.7671e-02, PNorm = 60.9190, GNorm = 0.5467, lr_0 = 5.0081e-04
Loss = 9.7387e-02, PNorm = 60.9252, GNorm = 0.8913, lr_0 = 5.0047e-04
Loss = 8.5495e-02, PNorm = 60.9317, GNorm = 0.6988, lr_0 = 5.0012e-04
Loss = 8.7574e-02, PNorm = 60.9401, GNorm = 0.7645, lr_0 = 4.9978e-04
Loss = 8.8759e-02, PNorm = 60.9487, GNorm = 1.4346, lr_0 = 4.9944e-04
Loss = 9.2049e-02, PNorm = 60.9549, GNorm = 0.7584, lr_0 = 4.9910e-04
Loss = 8.9796e-02, PNorm = 60.9641, GNorm = 0.5563, lr_0 = 4.9875e-04
Loss = 8.9953e-02, PNorm = 60.9752, GNorm = 0.4429, lr_0 = 4.9841e-04
Loss = 9.4798e-02, PNorm = 60.9876, GNorm = 1.0768, lr_0 = 4.9807e-04
Loss = 9.0031e-02, PNorm = 60.9957, GNorm = 0.7141, lr_0 = 4.9773e-04
Loss = 9.5060e-02, PNorm = 61.0055, GNorm = 0.5376, lr_0 = 4.9739e-04
Loss = 7.5248e-02, PNorm = 61.0126, GNorm = 0.6058, lr_0 = 4.9705e-04
Loss = 9.6851e-02, PNorm = 61.0208, GNorm = 0.6695, lr_0 = 4.9671e-04
Loss = 8.7796e-02, PNorm = 61.0376, GNorm = 0.6420, lr_0 = 4.9637e-04
Loss = 8.0726e-02, PNorm = 61.0525, GNorm = 0.5369, lr_0 = 4.9603e-04
Loss = 1.0663e-01, PNorm = 61.0676, GNorm = 0.6913, lr_0 = 4.9569e-04
Loss = 8.4619e-02, PNorm = 61.0848, GNorm = 0.5084, lr_0 = 4.9535e-04
Loss = 9.5810e-02, PNorm = 61.0987, GNorm = 0.6534, lr_0 = 4.9501e-04
Loss = 9.3294e-02, PNorm = 61.1106, GNorm = 0.6436, lr_0 = 4.9467e-04
Loss = 9.0523e-02, PNorm = 61.1204, GNorm = 0.9475, lr_0 = 4.9433e-04
Loss = 9.9589e-02, PNorm = 61.1314, GNorm = 0.6140, lr_0 = 4.9399e-04
Loss = 9.2141e-02, PNorm = 61.1396, GNorm = 0.6823, lr_0 = 4.9365e-04
Loss = 9.8059e-02, PNorm = 61.1454, GNorm = 0.7345, lr_0 = 4.9332e-04
Loss = 8.1361e-02, PNorm = 61.1534, GNorm = 0.5260, lr_0 = 4.9298e-04
Loss = 9.8032e-02, PNorm = 61.1658, GNorm = 0.8178, lr_0 = 4.9264e-04
Loss = 1.1692e-01, PNorm = 61.1785, GNorm = 0.5758, lr_0 = 4.9230e-04
Loss = 9.5782e-02, PNorm = 61.1858, GNorm = 0.8085, lr_0 = 4.9197e-04
Loss = 1.0360e-01, PNorm = 61.1973, GNorm = 1.9639, lr_0 = 4.9163e-04
Loss = 9.8552e-02, PNorm = 61.2133, GNorm = 0.6910, lr_0 = 4.9129e-04
Loss = 9.8351e-02, PNorm = 61.2253, GNorm = 1.5948, lr_0 = 4.9095e-04
Loss = 9.6534e-02, PNorm = 61.2359, GNorm = 1.5851, lr_0 = 4.9062e-04
Loss = 9.3609e-02, PNorm = 61.2480, GNorm = 0.7654, lr_0 = 4.9028e-04
Loss = 1.0366e-01, PNorm = 61.2579, GNorm = 0.8189, lr_0 = 4.8995e-04
Loss = 1.1102e-01, PNorm = 61.2714, GNorm = 0.6455, lr_0 = 4.8961e-04
Loss = 9.0699e-02, PNorm = 61.2841, GNorm = 1.0253, lr_0 = 4.8928e-04
Loss = 9.1803e-02, PNorm = 61.2965, GNorm = 0.6152, lr_0 = 4.8894e-04
Loss = 8.9559e-02, PNorm = 61.3076, GNorm = 0.5440, lr_0 = 4.8861e-04
Loss = 8.9457e-02, PNorm = 61.3182, GNorm = 0.7067, lr_0 = 4.8827e-04
Loss = 8.5195e-02, PNorm = 61.3251, GNorm = 0.8093, lr_0 = 4.8794e-04
Loss = 1.0080e-01, PNorm = 61.3395, GNorm = 0.5642, lr_0 = 4.8760e-04
Loss = 9.4027e-02, PNorm = 61.3500, GNorm = 0.6254, lr_0 = 4.8727e-04
Loss = 7.7415e-02, PNorm = 61.3587, GNorm = 0.4039, lr_0 = 4.8693e-04
Loss = 9.7823e-02, PNorm = 61.3694, GNorm = 0.8943, lr_0 = 4.8660e-04
Loss = 9.3851e-02, PNorm = 61.3795, GNorm = 1.1959, lr_0 = 4.8627e-04
Loss = 9.3339e-02, PNorm = 61.3905, GNorm = 0.5440, lr_0 = 4.8593e-04
Loss = 9.4302e-02, PNorm = 61.3952, GNorm = 0.8177, lr_0 = 4.8560e-04
Loss = 9.6750e-02, PNorm = 61.4027, GNorm = 0.8316, lr_0 = 4.8527e-04
Loss = 1.0913e-01, PNorm = 61.4170, GNorm = 0.7303, lr_0 = 4.8494e-04
Loss = 9.6130e-02, PNorm = 61.4346, GNorm = 1.0270, lr_0 = 4.8460e-04
Loss = 8.7611e-02, PNorm = 61.4473, GNorm = 0.7489, lr_0 = 4.8427e-04
Loss = 9.7660e-02, PNorm = 61.4532, GNorm = 1.0481, lr_0 = 4.8394e-04
Loss = 9.3939e-02, PNorm = 61.4679, GNorm = 0.7368, lr_0 = 4.8361e-04
Loss = 8.6595e-02, PNorm = 61.4768, GNorm = 0.7803, lr_0 = 4.8328e-04
Loss = 8.7221e-02, PNorm = 61.4888, GNorm = 0.5015, lr_0 = 4.8295e-04
Loss = 9.1618e-02, PNorm = 61.4989, GNorm = 0.8279, lr_0 = 4.8262e-04
Loss = 8.8583e-02, PNorm = 61.5045, GNorm = 0.6474, lr_0 = 4.8228e-04
Loss = 9.1851e-02, PNorm = 61.5107, GNorm = 0.6098, lr_0 = 4.8195e-04
Loss = 8.8837e-02, PNorm = 61.5233, GNorm = 0.5284, lr_0 = 4.8162e-04
Loss = 8.7768e-02, PNorm = 61.5373, GNorm = 0.4107, lr_0 = 4.8129e-04
Loss = 1.0191e-01, PNorm = 61.5531, GNorm = 0.7362, lr_0 = 4.8096e-04
Loss = 1.0498e-01, PNorm = 61.5657, GNorm = 0.7990, lr_0 = 4.8064e-04
Loss = 9.4965e-02, PNorm = 61.5779, GNorm = 0.5469, lr_0 = 4.8031e-04
Loss = 9.1447e-02, PNorm = 61.5876, GNorm = 0.5421, lr_0 = 4.7998e-04
Loss = 8.5294e-02, PNorm = 61.5951, GNorm = 0.6672, lr_0 = 4.7965e-04
Loss = 9.0013e-02, PNorm = 61.6030, GNorm = 0.5291, lr_0 = 4.7932e-04
Loss = 9.7947e-02, PNorm = 61.6139, GNorm = 0.5457, lr_0 = 4.7899e-04
Loss = 8.5786e-02, PNorm = 61.6261, GNorm = 0.5022, lr_0 = 4.7866e-04
Loss = 8.7138e-02, PNorm = 61.6372, GNorm = 1.1478, lr_0 = 4.7833e-04
Loss = 8.6985e-02, PNorm = 61.6482, GNorm = 0.5697, lr_0 = 4.7801e-04
Loss = 9.7464e-02, PNorm = 61.6567, GNorm = 0.7545, lr_0 = 4.7768e-04
Loss = 9.7171e-02, PNorm = 61.6675, GNorm = 1.0253, lr_0 = 4.7735e-04
Loss = 7.9287e-02, PNorm = 61.6782, GNorm = 0.8962, lr_0 = 4.7703e-04
Validation mae = 0.400163
Epoch 11
Loss = 7.0393e-02, PNorm = 61.6851, GNorm = 0.9203, lr_0 = 4.7670e-04
Loss = 9.0530e-02, PNorm = 61.6954, GNorm = 0.5930, lr_0 = 4.7637e-04
Loss = 7.2355e-02, PNorm = 61.7086, GNorm = 0.3869, lr_0 = 4.7605e-04
Loss = 7.4889e-02, PNorm = 61.7226, GNorm = 0.7695, lr_0 = 4.7572e-04
Loss = 8.0411e-02, PNorm = 61.7371, GNorm = 0.6099, lr_0 = 4.7539e-04
Loss = 8.6865e-02, PNorm = 61.7518, GNorm = 1.0738, lr_0 = 4.7507e-04
Loss = 7.1233e-02, PNorm = 61.7629, GNorm = 1.0191, lr_0 = 4.7474e-04
Loss = 8.4856e-02, PNorm = 61.7778, GNorm = 0.5188, lr_0 = 4.7442e-04
Loss = 9.6048e-02, PNorm = 61.7909, GNorm = 0.5820, lr_0 = 4.7409e-04
Loss = 8.4732e-02, PNorm = 61.8022, GNorm = 0.6485, lr_0 = 4.7377e-04
Loss = 9.5879e-02, PNorm = 61.8161, GNorm = 0.5005, lr_0 = 4.7344e-04
Loss = 8.5260e-02, PNorm = 61.8307, GNorm = 0.7788, lr_0 = 4.7312e-04
Loss = 8.0008e-02, PNorm = 61.8351, GNorm = 0.5768, lr_0 = 4.7279e-04
Loss = 8.7832e-02, PNorm = 61.8422, GNorm = 1.1521, lr_0 = 4.7247e-04
Loss = 8.8143e-02, PNorm = 61.8548, GNorm = 1.0527, lr_0 = 4.7215e-04
Loss = 8.3908e-02, PNorm = 61.8676, GNorm = 0.7360, lr_0 = 4.7182e-04
Loss = 8.2016e-02, PNorm = 61.8791, GNorm = 0.4499, lr_0 = 4.7150e-04
Loss = 8.3600e-02, PNorm = 61.8904, GNorm = 0.6691, lr_0 = 4.7118e-04
Loss = 8.1382e-02, PNorm = 61.8975, GNorm = 0.4531, lr_0 = 4.7085e-04
Loss = 7.7864e-02, PNorm = 61.9087, GNorm = 0.5475, lr_0 = 4.7053e-04
Loss = 7.2087e-02, PNorm = 61.9228, GNorm = 1.1419, lr_0 = 4.7021e-04
Loss = 7.5467e-02, PNorm = 61.9328, GNorm = 0.8251, lr_0 = 4.6989e-04
Loss = 8.1726e-02, PNorm = 61.9424, GNorm = 0.6321, lr_0 = 4.6957e-04
Loss = 7.9417e-02, PNorm = 61.9562, GNorm = 0.6543, lr_0 = 4.6924e-04
Loss = 7.6895e-02, PNorm = 61.9720, GNorm = 0.4878, lr_0 = 4.6892e-04
Loss = 9.2153e-02, PNorm = 61.9867, GNorm = 1.3945, lr_0 = 4.6860e-04
Loss = 9.5061e-02, PNorm = 61.9972, GNorm = 0.6356, lr_0 = 4.6828e-04
Loss = 7.4742e-02, PNorm = 62.0052, GNorm = 0.8478, lr_0 = 4.6796e-04
Loss = 9.6395e-02, PNorm = 62.0150, GNorm = 0.6906, lr_0 = 4.6764e-04
Loss = 8.8276e-02, PNorm = 62.0232, GNorm = 0.5452, lr_0 = 4.6732e-04
Loss = 8.9784e-02, PNorm = 62.0339, GNorm = 0.5599, lr_0 = 4.6700e-04
Loss = 8.5088e-02, PNorm = 62.0454, GNorm = 0.6057, lr_0 = 4.6668e-04
Loss = 8.6642e-02, PNorm = 62.0586, GNorm = 0.7350, lr_0 = 4.6636e-04
Loss = 9.2835e-02, PNorm = 62.0712, GNorm = 1.1517, lr_0 = 4.6604e-04
Loss = 8.4001e-02, PNorm = 62.0886, GNorm = 0.4951, lr_0 = 4.6572e-04
Loss = 8.4688e-02, PNorm = 62.1028, GNorm = 0.6623, lr_0 = 4.6540e-04
Loss = 7.5845e-02, PNorm = 62.1142, GNorm = 1.0185, lr_0 = 4.6508e-04
Loss = 8.7971e-02, PNorm = 62.1241, GNorm = 0.4956, lr_0 = 4.6476e-04
Loss = 1.0156e-01, PNorm = 62.1306, GNorm = 0.9405, lr_0 = 4.6445e-04
Loss = 8.3012e-02, PNorm = 62.1432, GNorm = 0.7489, lr_0 = 4.6413e-04
Loss = 7.8963e-02, PNorm = 62.1569, GNorm = 0.4507, lr_0 = 4.6381e-04
Loss = 7.9697e-02, PNorm = 62.1654, GNorm = 0.5973, lr_0 = 4.6349e-04
Loss = 8.5223e-02, PNorm = 62.1739, GNorm = 0.6931, lr_0 = 4.6317e-04
Loss = 8.3738e-02, PNorm = 62.1864, GNorm = 1.1140, lr_0 = 4.6286e-04
Loss = 9.2278e-02, PNorm = 62.2014, GNorm = 0.7019, lr_0 = 4.6254e-04
Loss = 8.6673e-02, PNorm = 62.2152, GNorm = 0.9475, lr_0 = 4.6222e-04
Loss = 8.3001e-02, PNorm = 62.2266, GNorm = 0.6049, lr_0 = 4.6191e-04
Loss = 9.9284e-02, PNorm = 62.2389, GNorm = 0.8195, lr_0 = 4.6159e-04
Loss = 7.7138e-02, PNorm = 62.2496, GNorm = 0.4792, lr_0 = 4.6127e-04
Loss = 8.4824e-02, PNorm = 62.2624, GNorm = 0.5774, lr_0 = 4.6096e-04
Loss = 7.6616e-02, PNorm = 62.2762, GNorm = 0.9595, lr_0 = 4.6064e-04
Loss = 8.6929e-02, PNorm = 62.2874, GNorm = 0.6545, lr_0 = 4.6033e-04
Loss = 9.0476e-02, PNorm = 62.2957, GNorm = 0.6968, lr_0 = 4.6001e-04
Loss = 7.0479e-02, PNorm = 62.3029, GNorm = 0.3796, lr_0 = 4.5970e-04
Loss = 8.3840e-02, PNorm = 62.3103, GNorm = 0.9746, lr_0 = 4.5938e-04
Loss = 9.1100e-02, PNorm = 62.3193, GNorm = 0.6959, lr_0 = 4.5907e-04
Loss = 9.2027e-02, PNorm = 62.3335, GNorm = 0.6613, lr_0 = 4.5875e-04
Loss = 8.8743e-02, PNorm = 62.3468, GNorm = 0.6719, lr_0 = 4.5844e-04
Loss = 8.5407e-02, PNorm = 62.3580, GNorm = 0.6159, lr_0 = 4.5812e-04
Loss = 8.9748e-02, PNorm = 62.3710, GNorm = 0.9947, lr_0 = 4.5781e-04
Loss = 9.7102e-02, PNorm = 62.3841, GNorm = 0.5159, lr_0 = 4.5750e-04
Loss = 9.5243e-02, PNorm = 62.4001, GNorm = 0.6536, lr_0 = 4.5718e-04
Loss = 7.7521e-02, PNorm = 62.4124, GNorm = 0.7958, lr_0 = 4.5687e-04
Loss = 8.7712e-02, PNorm = 62.4242, GNorm = 0.7157, lr_0 = 4.5656e-04
Loss = 9.6251e-02, PNorm = 62.4347, GNorm = 0.5236, lr_0 = 4.5624e-04
Loss = 9.4517e-02, PNorm = 62.4479, GNorm = 0.7492, lr_0 = 4.5593e-04
Loss = 9.1484e-02, PNorm = 62.4543, GNorm = 0.4360, lr_0 = 4.5562e-04
Loss = 7.4861e-02, PNorm = 62.4617, GNorm = 0.7599, lr_0 = 4.5531e-04
Loss = 8.1864e-02, PNorm = 62.4686, GNorm = 0.6153, lr_0 = 4.5499e-04
Loss = 8.9365e-02, PNorm = 62.4782, GNorm = 0.5792, lr_0 = 4.5468e-04
Loss = 9.2053e-02, PNorm = 62.4874, GNorm = 0.6637, lr_0 = 4.5437e-04
Loss = 8.4506e-02, PNorm = 62.4954, GNorm = 0.6788, lr_0 = 4.5406e-04
Loss = 7.7449e-02, PNorm = 62.5070, GNorm = 0.4913, lr_0 = 4.5375e-04
Loss = 1.0683e-01, PNorm = 62.5198, GNorm = 1.1854, lr_0 = 4.5344e-04
Loss = 8.6162e-02, PNorm = 62.5321, GNorm = 0.8661, lr_0 = 4.5313e-04
Loss = 6.7196e-02, PNorm = 62.5414, GNorm = 0.5729, lr_0 = 4.5282e-04
Loss = 8.7431e-02, PNorm = 62.5516, GNorm = 0.5008, lr_0 = 4.5251e-04
Loss = 8.0647e-02, PNorm = 62.5601, GNorm = 0.4965, lr_0 = 4.5220e-04
Loss = 9.1005e-02, PNorm = 62.5688, GNorm = 0.6412, lr_0 = 4.5189e-04
Loss = 8.8189e-02, PNorm = 62.5807, GNorm = 0.5041, lr_0 = 4.5158e-04
Loss = 9.2038e-02, PNorm = 62.5874, GNorm = 0.6651, lr_0 = 4.5127e-04
Loss = 7.8982e-02, PNorm = 62.5944, GNorm = 0.6008, lr_0 = 4.5096e-04
Loss = 8.7632e-02, PNorm = 62.6033, GNorm = 0.7018, lr_0 = 4.5065e-04
Loss = 8.4838e-02, PNorm = 62.6149, GNorm = 0.6813, lr_0 = 4.5034e-04
Loss = 9.2938e-02, PNorm = 62.6282, GNorm = 0.6924, lr_0 = 4.5003e-04
Loss = 8.2543e-02, PNorm = 62.6392, GNorm = 0.5962, lr_0 = 4.4972e-04
Loss = 8.5667e-02, PNorm = 62.6476, GNorm = 0.4925, lr_0 = 4.4942e-04
Loss = 8.4096e-02, PNorm = 62.6558, GNorm = 0.3833, lr_0 = 4.4911e-04
Loss = 9.8024e-02, PNorm = 62.6670, GNorm = 0.8305, lr_0 = 4.4880e-04
Loss = 9.1589e-02, PNorm = 62.6750, GNorm = 0.7983, lr_0 = 4.4849e-04
Loss = 8.2792e-02, PNorm = 62.6863, GNorm = 0.6951, lr_0 = 4.4819e-04
Loss = 8.5501e-02, PNorm = 62.6969, GNorm = 0.6804, lr_0 = 4.4788e-04
Loss = 7.9808e-02, PNorm = 62.7058, GNorm = 0.5651, lr_0 = 4.4757e-04
Loss = 9.2897e-02, PNorm = 62.7133, GNorm = 1.3754, lr_0 = 4.4727e-04
Loss = 1.0670e-01, PNorm = 62.7266, GNorm = 0.8682, lr_0 = 4.4696e-04
Loss = 9.1375e-02, PNorm = 62.7434, GNorm = 0.6085, lr_0 = 4.4665e-04
Loss = 8.6072e-02, PNorm = 62.7546, GNorm = 0.7766, lr_0 = 4.4635e-04
Loss = 9.7110e-02, PNorm = 62.7621, GNorm = 0.6206, lr_0 = 4.4604e-04
Loss = 8.6759e-02, PNorm = 62.7745, GNorm = 0.5238, lr_0 = 4.4574e-04
Loss = 7.3355e-02, PNorm = 62.7826, GNorm = 0.6866, lr_0 = 4.4543e-04
Loss = 1.0339e-01, PNorm = 62.7919, GNorm = 1.4254, lr_0 = 4.4513e-04
Loss = 9.5750e-02, PNorm = 62.8052, GNorm = 0.6400, lr_0 = 4.4482e-04
Loss = 9.5481e-02, PNorm = 62.8194, GNorm = 0.9716, lr_0 = 4.4452e-04
Loss = 9.4352e-02, PNorm = 62.8297, GNorm = 1.1639, lr_0 = 4.4421e-04
Loss = 1.0971e-01, PNorm = 62.8422, GNorm = 0.4565, lr_0 = 4.4391e-04
Loss = 8.2754e-02, PNorm = 62.8511, GNorm = 0.6329, lr_0 = 4.4360e-04
Loss = 8.7953e-02, PNorm = 62.8620, GNorm = 0.6595, lr_0 = 4.4330e-04
Loss = 9.0317e-02, PNorm = 62.8662, GNorm = 0.7947, lr_0 = 4.4299e-04
Loss = 9.0387e-02, PNorm = 62.8732, GNorm = 0.4889, lr_0 = 4.4269e-04
Loss = 8.9198e-02, PNorm = 62.8821, GNorm = 1.4094, lr_0 = 4.4239e-04
Loss = 8.9448e-02, PNorm = 62.8910, GNorm = 1.1170, lr_0 = 4.4209e-04
Loss = 9.9533e-02, PNorm = 62.9043, GNorm = 0.6935, lr_0 = 4.4178e-04
Loss = 7.9036e-02, PNorm = 62.9145, GNorm = 0.7287, lr_0 = 4.4148e-04
Loss = 9.0432e-02, PNorm = 62.9247, GNorm = 0.7927, lr_0 = 4.4118e-04
Loss = 8.7636e-02, PNorm = 62.9352, GNorm = 0.7164, lr_0 = 4.4088e-04
Loss = 1.0334e-01, PNorm = 62.9435, GNorm = 0.7638, lr_0 = 4.4057e-04
Loss = 9.9085e-02, PNorm = 62.9589, GNorm = 0.6108, lr_0 = 4.4027e-04
Loss = 9.4784e-02, PNorm = 62.9712, GNorm = 1.0250, lr_0 = 4.3997e-04
Loss = 8.7750e-02, PNorm = 62.9820, GNorm = 0.9232, lr_0 = 4.3967e-04
Loss = 9.1515e-02, PNorm = 62.9918, GNorm = 0.6740, lr_0 = 4.3937e-04
Validation mae = 0.395630
Epoch 12
Loss = 7.8093e-02, PNorm = 63.0023, GNorm = 0.5304, lr_0 = 4.3907e-04
Loss = 6.3744e-02, PNorm = 63.0117, GNorm = 0.5682, lr_0 = 4.3877e-04
Loss = 7.4563e-02, PNorm = 63.0189, GNorm = 0.6220, lr_0 = 4.3846e-04
Loss = 6.8514e-02, PNorm = 63.0302, GNorm = 0.4835, lr_0 = 4.3816e-04
Loss = 9.4276e-02, PNorm = 63.0435, GNorm = 0.6474, lr_0 = 4.3786e-04
Loss = 8.0765e-02, PNorm = 63.0551, GNorm = 0.5174, lr_0 = 4.3756e-04
Loss = 8.9358e-02, PNorm = 63.0654, GNorm = 0.6992, lr_0 = 4.3726e-04
Loss = 7.5099e-02, PNorm = 63.0771, GNorm = 0.5333, lr_0 = 4.3696e-04
Loss = 8.5712e-02, PNorm = 63.0877, GNorm = 0.7857, lr_0 = 4.3667e-04
Loss = 7.8072e-02, PNorm = 63.1026, GNorm = 0.5050, lr_0 = 4.3637e-04
Loss = 7.5702e-02, PNorm = 63.1171, GNorm = 0.5780, lr_0 = 4.3607e-04
Loss = 8.0172e-02, PNorm = 63.1289, GNorm = 0.6342, lr_0 = 4.3577e-04
Loss = 7.0162e-02, PNorm = 63.1389, GNorm = 0.4343, lr_0 = 4.3547e-04
Loss = 8.3196e-02, PNorm = 63.1495, GNorm = 0.9119, lr_0 = 4.3517e-04
Loss = 7.5333e-02, PNorm = 63.1627, GNorm = 0.6008, lr_0 = 4.3487e-04
Loss = 7.6539e-02, PNorm = 63.1743, GNorm = 0.6138, lr_0 = 4.3458e-04
Loss = 8.5337e-02, PNorm = 63.1836, GNorm = 0.9140, lr_0 = 4.3428e-04
Loss = 8.3190e-02, PNorm = 63.1937, GNorm = 0.5065, lr_0 = 4.3398e-04
Loss = 8.4280e-02, PNorm = 63.2025, GNorm = 0.8543, lr_0 = 4.3368e-04
Loss = 6.4325e-02, PNorm = 63.2085, GNorm = 0.4769, lr_0 = 4.3339e-04
Loss = 8.7112e-02, PNorm = 63.2204, GNorm = 0.6174, lr_0 = 4.3309e-04
Loss = 7.5070e-02, PNorm = 63.2353, GNorm = 0.6898, lr_0 = 4.3279e-04
Loss = 7.3766e-02, PNorm = 63.2439, GNorm = 1.1431, lr_0 = 4.3250e-04
Loss = 7.0301e-02, PNorm = 63.2502, GNorm = 0.4308, lr_0 = 4.3220e-04
Loss = 7.5145e-02, PNorm = 63.2580, GNorm = 1.3813, lr_0 = 4.3190e-04
Loss = 8.9924e-02, PNorm = 63.2700, GNorm = 0.8742, lr_0 = 4.3161e-04
Loss = 8.3373e-02, PNorm = 63.2826, GNorm = 0.7325, lr_0 = 4.3131e-04
Loss = 7.3653e-02, PNorm = 63.2925, GNorm = 0.7249, lr_0 = 4.3102e-04
Loss = 8.1854e-02, PNorm = 63.3020, GNorm = 0.7487, lr_0 = 4.3072e-04
Loss = 7.1012e-02, PNorm = 63.3085, GNorm = 0.6021, lr_0 = 4.3043e-04
Loss = 8.3774e-02, PNorm = 63.3157, GNorm = 0.5634, lr_0 = 4.3013e-04
Loss = 9.2398e-02, PNorm = 63.3269, GNorm = 0.5656, lr_0 = 4.2984e-04
Loss = 7.2650e-02, PNorm = 63.3378, GNorm = 0.5601, lr_0 = 4.2954e-04
Loss = 9.1750e-02, PNorm = 63.3461, GNorm = 0.6401, lr_0 = 4.2925e-04
Loss = 7.8162e-02, PNorm = 63.3587, GNorm = 0.9007, lr_0 = 4.2895e-04
Loss = 8.4404e-02, PNorm = 63.3691, GNorm = 0.8684, lr_0 = 4.2866e-04
Loss = 7.4267e-02, PNorm = 63.3775, GNorm = 0.4539, lr_0 = 4.2837e-04
Loss = 7.5852e-02, PNorm = 63.3857, GNorm = 0.8703, lr_0 = 4.2807e-04
Loss = 7.2504e-02, PNorm = 63.3950, GNorm = 0.7490, lr_0 = 4.2778e-04
Loss = 7.2245e-02, PNorm = 63.4046, GNorm = 0.6561, lr_0 = 4.2749e-04
Loss = 7.9496e-02, PNorm = 63.4147, GNorm = 0.5326, lr_0 = 4.2719e-04
Loss = 7.6118e-02, PNorm = 63.4221, GNorm = 0.5810, lr_0 = 4.2690e-04
Loss = 7.2331e-02, PNorm = 63.4311, GNorm = 0.6595, lr_0 = 4.2661e-04
Loss = 7.2103e-02, PNorm = 63.4370, GNorm = 0.9697, lr_0 = 4.2632e-04
Loss = 8.2543e-02, PNorm = 63.4438, GNorm = 0.7717, lr_0 = 4.2602e-04
Loss = 8.6708e-02, PNorm = 63.4486, GNorm = 0.6142, lr_0 = 4.2573e-04
Loss = 8.7104e-02, PNorm = 63.4601, GNorm = 0.6872, lr_0 = 4.2544e-04
Loss = 9.7041e-02, PNorm = 63.4712, GNorm = 0.4340, lr_0 = 4.2515e-04
Loss = 7.2578e-02, PNorm = 63.4854, GNorm = 0.5789, lr_0 = 4.2486e-04
Loss = 8.4567e-02, PNorm = 63.4936, GNorm = 0.5911, lr_0 = 4.2457e-04
Loss = 7.8550e-02, PNorm = 63.5000, GNorm = 0.6471, lr_0 = 4.2428e-04
Loss = 8.2408e-02, PNorm = 63.5110, GNorm = 0.7680, lr_0 = 4.2399e-04
Loss = 7.5845e-02, PNorm = 63.5197, GNorm = 0.6036, lr_0 = 4.2370e-04
Loss = 8.2776e-02, PNorm = 63.5292, GNorm = 0.5936, lr_0 = 4.2340e-04
Loss = 7.6579e-02, PNorm = 63.5406, GNorm = 0.5495, lr_0 = 4.2311e-04
Loss = 8.5836e-02, PNorm = 63.5519, GNorm = 0.9618, lr_0 = 4.2283e-04
Loss = 7.8303e-02, PNorm = 63.5649, GNorm = 1.0493, lr_0 = 4.2254e-04
Loss = 8.0119e-02, PNorm = 63.5781, GNorm = 0.6777, lr_0 = 4.2225e-04
Loss = 9.0755e-02, PNorm = 63.5911, GNorm = 0.7244, lr_0 = 4.2196e-04
Loss = 9.8902e-02, PNorm = 63.6031, GNorm = 0.6088, lr_0 = 4.2167e-04
Loss = 7.2556e-02, PNorm = 63.6113, GNorm = 0.6389, lr_0 = 4.2138e-04
Loss = 8.0586e-02, PNorm = 63.6166, GNorm = 0.5044, lr_0 = 4.2109e-04
Loss = 7.6763e-02, PNorm = 63.6213, GNorm = 0.4495, lr_0 = 4.2080e-04
Loss = 8.3048e-02, PNorm = 63.6275, GNorm = 0.5774, lr_0 = 4.2051e-04
Loss = 8.5620e-02, PNorm = 63.6307, GNorm = 0.4184, lr_0 = 4.2023e-04
Loss = 7.5232e-02, PNorm = 63.6378, GNorm = 0.6412, lr_0 = 4.1994e-04
Loss = 8.5519e-02, PNorm = 63.6514, GNorm = 0.7423, lr_0 = 4.1965e-04
Loss = 7.3636e-02, PNorm = 63.6626, GNorm = 0.7718, lr_0 = 4.1936e-04
Loss = 8.4883e-02, PNorm = 63.6696, GNorm = 0.5695, lr_0 = 4.1907e-04
Loss = 7.0944e-02, PNorm = 63.6771, GNorm = 0.5541, lr_0 = 4.1879e-04
Loss = 7.2724e-02, PNorm = 63.6877, GNorm = 1.2109, lr_0 = 4.1850e-04
Loss = 8.3362e-02, PNorm = 63.6992, GNorm = 1.0945, lr_0 = 4.1821e-04
Loss = 8.4781e-02, PNorm = 63.7103, GNorm = 0.7100, lr_0 = 4.1793e-04
Loss = 8.7556e-02, PNorm = 63.7210, GNorm = 0.5767, lr_0 = 4.1764e-04
Loss = 8.3929e-02, PNorm = 63.7307, GNorm = 0.4879, lr_0 = 4.1736e-04
Loss = 7.7908e-02, PNorm = 63.7427, GNorm = 0.5137, lr_0 = 4.1707e-04
Loss = 8.9403e-02, PNorm = 63.7483, GNorm = 0.8166, lr_0 = 4.1678e-04
Loss = 8.0838e-02, PNorm = 63.7565, GNorm = 0.5631, lr_0 = 4.1650e-04
Loss = 9.0837e-02, PNorm = 63.7652, GNorm = 0.5210, lr_0 = 4.1621e-04
Loss = 7.3463e-02, PNorm = 63.7769, GNorm = 0.6127, lr_0 = 4.1593e-04
Loss = 7.7757e-02, PNorm = 63.7889, GNorm = 0.7137, lr_0 = 4.1564e-04
Loss = 7.9183e-02, PNorm = 63.7996, GNorm = 0.8158, lr_0 = 4.1536e-04
Loss = 7.1529e-02, PNorm = 63.8060, GNorm = 0.5960, lr_0 = 4.1507e-04
Loss = 7.8121e-02, PNorm = 63.8153, GNorm = 0.8168, lr_0 = 4.1479e-04
Loss = 7.3658e-02, PNorm = 63.8246, GNorm = 0.4824, lr_0 = 4.1450e-04
Loss = 7.1833e-02, PNorm = 63.8313, GNorm = 0.5147, lr_0 = 4.1422e-04
Loss = 8.0923e-02, PNorm = 63.8377, GNorm = 0.7448, lr_0 = 4.1394e-04
Loss = 8.0291e-02, PNorm = 63.8440, GNorm = 0.5775, lr_0 = 4.1365e-04
Loss = 8.8532e-02, PNorm = 63.8518, GNorm = 0.5420, lr_0 = 4.1337e-04
Loss = 7.4733e-02, PNorm = 63.8606, GNorm = 0.5496, lr_0 = 4.1309e-04
Loss = 8.9877e-02, PNorm = 63.8711, GNorm = 0.6056, lr_0 = 4.1280e-04
Loss = 8.4614e-02, PNorm = 63.8804, GNorm = 0.6083, lr_0 = 4.1252e-04
Loss = 8.2868e-02, PNorm = 63.8897, GNorm = 0.6826, lr_0 = 4.1224e-04
Loss = 9.1390e-02, PNorm = 63.8999, GNorm = 0.7993, lr_0 = 4.1196e-04
Loss = 8.8741e-02, PNorm = 63.9100, GNorm = 0.5822, lr_0 = 4.1167e-04
Loss = 7.8028e-02, PNorm = 63.9207, GNorm = 0.5131, lr_0 = 4.1139e-04
Loss = 8.6084e-02, PNorm = 63.9302, GNorm = 0.4958, lr_0 = 4.1111e-04
Loss = 8.7898e-02, PNorm = 63.9411, GNorm = 1.0985, lr_0 = 4.1083e-04
Loss = 6.5107e-02, PNorm = 63.9520, GNorm = 0.7653, lr_0 = 4.1055e-04
Loss = 8.8518e-02, PNorm = 63.9590, GNorm = 0.7116, lr_0 = 4.1027e-04
Loss = 8.9831e-02, PNorm = 63.9651, GNorm = 0.6639, lr_0 = 4.0998e-04
Loss = 7.3866e-02, PNorm = 63.9720, GNorm = 0.7245, lr_0 = 4.0970e-04
Loss = 9.2680e-02, PNorm = 63.9828, GNorm = 1.3285, lr_0 = 4.0942e-04
Loss = 7.9518e-02, PNorm = 63.9921, GNorm = 0.6707, lr_0 = 4.0914e-04
Loss = 7.7880e-02, PNorm = 63.9998, GNorm = 0.5638, lr_0 = 4.0886e-04
Loss = 6.7511e-02, PNorm = 64.0065, GNorm = 0.5110, lr_0 = 4.0858e-04
Loss = 8.0034e-02, PNorm = 64.0152, GNorm = 0.7507, lr_0 = 4.0830e-04
Loss = 7.4419e-02, PNorm = 64.0256, GNorm = 0.7364, lr_0 = 4.0802e-04
Loss = 8.3540e-02, PNorm = 64.0329, GNorm = 0.6173, lr_0 = 4.0774e-04
Loss = 7.1717e-02, PNorm = 64.0428, GNorm = 0.6344, lr_0 = 4.0746e-04
Loss = 7.7973e-02, PNorm = 64.0493, GNorm = 0.9621, lr_0 = 4.0718e-04
Loss = 8.5315e-02, PNorm = 64.0575, GNorm = 0.5048, lr_0 = 4.0691e-04
Loss = 7.0495e-02, PNorm = 64.0626, GNorm = 0.6613, lr_0 = 4.0663e-04
Loss = 7.3074e-02, PNorm = 64.0691, GNorm = 0.5340, lr_0 = 4.0635e-04
Loss = 7.9123e-02, PNorm = 64.0773, GNorm = 0.5272, lr_0 = 4.0607e-04
Loss = 8.1444e-02, PNorm = 64.0871, GNorm = 1.1099, lr_0 = 4.0579e-04
Loss = 9.1293e-02, PNorm = 64.0999, GNorm = 0.8146, lr_0 = 4.0551e-04
Loss = 9.2223e-02, PNorm = 64.1108, GNorm = 0.7525, lr_0 = 4.0524e-04
Loss = 7.7595e-02, PNorm = 64.1164, GNorm = 0.6343, lr_0 = 4.0496e-04
Loss = 8.0360e-02, PNorm = 64.1217, GNorm = 0.8626, lr_0 = 4.0468e-04
Validation mae = 0.391493
Epoch 13
Loss = 6.5994e-02, PNorm = 64.1299, GNorm = 0.5934, lr_0 = 4.0440e-04
Loss = 6.9406e-02, PNorm = 64.1394, GNorm = 0.7035, lr_0 = 4.0413e-04
Loss = 7.3554e-02, PNorm = 64.1492, GNorm = 0.6506, lr_0 = 4.0385e-04
Loss = 7.4627e-02, PNorm = 64.1610, GNorm = 0.6293, lr_0 = 4.0357e-04
Loss = 6.5130e-02, PNorm = 64.1692, GNorm = 0.7720, lr_0 = 4.0330e-04
Loss = 6.8508e-02, PNorm = 64.1807, GNorm = 0.6195, lr_0 = 4.0302e-04
Loss = 6.3340e-02, PNorm = 64.1879, GNorm = 0.5963, lr_0 = 4.0274e-04
Loss = 7.1466e-02, PNorm = 64.1958, GNorm = 0.6497, lr_0 = 4.0247e-04
Loss = 7.1523e-02, PNorm = 64.2042, GNorm = 0.8395, lr_0 = 4.0219e-04
Loss = 7.0460e-02, PNorm = 64.2114, GNorm = 0.4570, lr_0 = 4.0192e-04
Loss = 7.2650e-02, PNorm = 64.2201, GNorm = 1.0632, lr_0 = 4.0164e-04
Loss = 7.6657e-02, PNorm = 64.2343, GNorm = 0.6040, lr_0 = 4.0137e-04
Loss = 7.2838e-02, PNorm = 64.2464, GNorm = 0.7696, lr_0 = 4.0109e-04
Loss = 8.1013e-02, PNorm = 64.2596, GNorm = 0.5596, lr_0 = 4.0082e-04
Loss = 7.0157e-02, PNorm = 64.2738, GNorm = 0.5881, lr_0 = 4.0054e-04
Loss = 7.0371e-02, PNorm = 64.2851, GNorm = 0.7965, lr_0 = 4.0027e-04
Loss = 8.6624e-02, PNorm = 64.2985, GNorm = 0.5301, lr_0 = 3.9999e-04
Loss = 6.6903e-02, PNorm = 64.3109, GNorm = 0.4401, lr_0 = 3.9972e-04
Loss = 6.8085e-02, PNorm = 64.3188, GNorm = 0.4939, lr_0 = 3.9945e-04
Loss = 7.8096e-02, PNorm = 64.3294, GNorm = 0.6928, lr_0 = 3.9917e-04
Loss = 6.8313e-02, PNorm = 64.3378, GNorm = 0.3679, lr_0 = 3.9890e-04
Loss = 7.1289e-02, PNorm = 64.3464, GNorm = 0.5097, lr_0 = 3.9863e-04
Loss = 8.7512e-02, PNorm = 64.3557, GNorm = 0.5492, lr_0 = 3.9835e-04
Loss = 6.7884e-02, PNorm = 64.3644, GNorm = 0.4796, lr_0 = 3.9808e-04
Loss = 6.7544e-02, PNorm = 64.3755, GNorm = 0.6526, lr_0 = 3.9781e-04
Loss = 7.9359e-02, PNorm = 64.3856, GNorm = 0.5772, lr_0 = 3.9753e-04
Loss = 7.0106e-02, PNorm = 64.3965, GNorm = 0.4611, lr_0 = 3.9726e-04
Loss = 7.1207e-02, PNorm = 64.4041, GNorm = 0.5031, lr_0 = 3.9699e-04
Loss = 7.6079e-02, PNorm = 64.4116, GNorm = 0.5446, lr_0 = 3.9672e-04
Loss = 7.4408e-02, PNorm = 64.4189, GNorm = 0.5181, lr_0 = 3.9645e-04
Loss = 7.4507e-02, PNorm = 64.4300, GNorm = 0.5482, lr_0 = 3.9617e-04
Loss = 7.6423e-02, PNorm = 64.4385, GNorm = 0.5837, lr_0 = 3.9590e-04
Loss = 8.0296e-02, PNorm = 64.4473, GNorm = 0.7265, lr_0 = 3.9563e-04
Loss = 7.1931e-02, PNorm = 64.4569, GNorm = 0.6369, lr_0 = 3.9536e-04
Loss = 6.9992e-02, PNorm = 64.4662, GNorm = 0.6976, lr_0 = 3.9509e-04
Loss = 7.3518e-02, PNorm = 64.4722, GNorm = 0.6458, lr_0 = 3.9482e-04
Loss = 6.8750e-02, PNorm = 64.4779, GNorm = 0.6605, lr_0 = 3.9455e-04
Loss = 6.4318e-02, PNorm = 64.4838, GNorm = 0.4329, lr_0 = 3.9428e-04
Loss = 8.2094e-02, PNorm = 64.4915, GNorm = 0.6555, lr_0 = 3.9401e-04
Loss = 7.6130e-02, PNorm = 64.4989, GNorm = 0.8718, lr_0 = 3.9374e-04
Loss = 7.0268e-02, PNorm = 64.5070, GNorm = 0.5913, lr_0 = 3.9347e-04
Loss = 7.6624e-02, PNorm = 64.5164, GNorm = 0.5675, lr_0 = 3.9320e-04
Loss = 8.0630e-02, PNorm = 64.5232, GNorm = 0.6041, lr_0 = 3.9293e-04
Loss = 6.9202e-02, PNorm = 64.5296, GNorm = 0.8173, lr_0 = 3.9266e-04
Loss = 7.6235e-02, PNorm = 64.5369, GNorm = 0.8310, lr_0 = 3.9239e-04
Loss = 8.0940e-02, PNorm = 64.5483, GNorm = 1.0678, lr_0 = 3.9212e-04
Loss = 7.1481e-02, PNorm = 64.5578, GNorm = 0.4921, lr_0 = 3.9185e-04
Loss = 9.0860e-02, PNorm = 64.5682, GNorm = 0.8461, lr_0 = 3.9159e-04
Loss = 7.0117e-02, PNorm = 64.5748, GNorm = 0.5887, lr_0 = 3.9132e-04
Loss = 7.9767e-02, PNorm = 64.5842, GNorm = 0.7128, lr_0 = 3.9105e-04
Loss = 6.4144e-02, PNorm = 64.5941, GNorm = 0.6893, lr_0 = 3.9078e-04
Loss = 6.5043e-02, PNorm = 64.6005, GNorm = 0.3570, lr_0 = 3.9051e-04
Loss = 7.2554e-02, PNorm = 64.6080, GNorm = 0.6867, lr_0 = 3.9025e-04
Loss = 7.9546e-02, PNorm = 64.6186, GNorm = 0.5603, lr_0 = 3.8998e-04
Loss = 7.3309e-02, PNorm = 64.6283, GNorm = 0.5598, lr_0 = 3.8971e-04
Loss = 7.2023e-02, PNorm = 64.6362, GNorm = 0.5487, lr_0 = 3.8945e-04
Loss = 7.4412e-02, PNorm = 64.6452, GNorm = 0.5719, lr_0 = 3.8918e-04
Loss = 7.6805e-02, PNorm = 64.6531, GNorm = 0.6426, lr_0 = 3.8891e-04
Loss = 7.4403e-02, PNorm = 64.6631, GNorm = 0.6076, lr_0 = 3.8865e-04
Loss = 7.3488e-02, PNorm = 64.6686, GNorm = 0.5330, lr_0 = 3.8838e-04
Loss = 6.9483e-02, PNorm = 64.6762, GNorm = 0.3720, lr_0 = 3.8811e-04
Loss = 6.9041e-02, PNorm = 64.6856, GNorm = 0.6337, lr_0 = 3.8785e-04
Loss = 7.2260e-02, PNorm = 64.6922, GNorm = 0.7386, lr_0 = 3.8758e-04
Loss = 7.6337e-02, PNorm = 64.6976, GNorm = 0.8509, lr_0 = 3.8732e-04
Loss = 8.3833e-02, PNorm = 64.7100, GNorm = 0.5815, lr_0 = 3.8705e-04
Loss = 8.8622e-02, PNorm = 64.7205, GNorm = 0.6097, lr_0 = 3.8679e-04
Loss = 8.2885e-02, PNorm = 64.7303, GNorm = 0.6432, lr_0 = 3.8652e-04
Loss = 7.9557e-02, PNorm = 64.7369, GNorm = 0.4845, lr_0 = 3.8626e-04
Loss = 6.6982e-02, PNorm = 64.7453, GNorm = 0.4068, lr_0 = 3.8599e-04
Loss = 7.0581e-02, PNorm = 64.7529, GNorm = 0.4114, lr_0 = 3.8573e-04
Loss = 7.3877e-02, PNorm = 64.7613, GNorm = 0.9294, lr_0 = 3.8546e-04
Loss = 7.1610e-02, PNorm = 64.7688, GNorm = 0.6584, lr_0 = 3.8520e-04
Loss = 7.3008e-02, PNorm = 64.7775, GNorm = 0.5641, lr_0 = 3.8493e-04
Loss = 7.1764e-02, PNorm = 64.7880, GNorm = 0.6470, lr_0 = 3.8467e-04
Loss = 8.0586e-02, PNorm = 64.7947, GNorm = 0.5320, lr_0 = 3.8441e-04
Loss = 6.6048e-02, PNorm = 64.7981, GNorm = 0.4403, lr_0 = 3.8414e-04
Loss = 8.5704e-02, PNorm = 64.8015, GNorm = 0.6238, lr_0 = 3.8388e-04
Loss = 8.6113e-02, PNorm = 64.8083, GNorm = 0.4962, lr_0 = 3.8362e-04
Loss = 7.6981e-02, PNorm = 64.8151, GNorm = 0.5927, lr_0 = 3.8336e-04
Loss = 7.9581e-02, PNorm = 64.8208, GNorm = 0.7320, lr_0 = 3.8309e-04
Loss = 7.7198e-02, PNorm = 64.8265, GNorm = 0.4766, lr_0 = 3.8283e-04
Loss = 6.8516e-02, PNorm = 64.8352, GNorm = 0.4829, lr_0 = 3.8257e-04
Loss = 7.3629e-02, PNorm = 64.8438, GNorm = 0.7958, lr_0 = 3.8231e-04
Loss = 7.0436e-02, PNorm = 64.8492, GNorm = 0.7829, lr_0 = 3.8204e-04
Loss = 6.3577e-02, PNorm = 64.8551, GNorm = 0.4681, lr_0 = 3.8178e-04
Loss = 7.5268e-02, PNorm = 64.8639, GNorm = 0.6993, lr_0 = 3.8152e-04
Loss = 7.8948e-02, PNorm = 64.8745, GNorm = 0.7045, lr_0 = 3.8126e-04
Loss = 7.9748e-02, PNorm = 64.8823, GNorm = 0.8316, lr_0 = 3.8100e-04
Loss = 7.8140e-02, PNorm = 64.8920, GNorm = 0.8408, lr_0 = 3.8074e-04
Loss = 7.0393e-02, PNorm = 64.9016, GNorm = 0.6290, lr_0 = 3.8048e-04
Loss = 8.7034e-02, PNorm = 64.9129, GNorm = 0.8766, lr_0 = 3.8022e-04
Loss = 7.9584e-02, PNorm = 64.9189, GNorm = 0.6842, lr_0 = 3.7995e-04
Loss = 7.1927e-02, PNorm = 64.9241, GNorm = 0.6090, lr_0 = 3.7969e-04
Loss = 7.4668e-02, PNorm = 64.9302, GNorm = 0.9293, lr_0 = 3.7943e-04
Loss = 7.9081e-02, PNorm = 64.9366, GNorm = 0.5693, lr_0 = 3.7917e-04
Loss = 7.1315e-02, PNorm = 64.9462, GNorm = 0.6525, lr_0 = 3.7891e-04
Loss = 7.7512e-02, PNorm = 64.9548, GNorm = 0.6083, lr_0 = 3.7866e-04
Loss = 7.9675e-02, PNorm = 64.9615, GNorm = 1.2233, lr_0 = 3.7840e-04
Loss = 7.5232e-02, PNorm = 64.9686, GNorm = 0.8604, lr_0 = 3.7814e-04
Loss = 7.1900e-02, PNorm = 64.9765, GNorm = 0.4468, lr_0 = 3.7788e-04
Loss = 6.0607e-02, PNorm = 64.9814, GNorm = 0.5913, lr_0 = 3.7762e-04
Loss = 7.6094e-02, PNorm = 64.9895, GNorm = 0.7225, lr_0 = 3.7736e-04
Loss = 7.0921e-02, PNorm = 64.9972, GNorm = 0.5161, lr_0 = 3.7710e-04
Loss = 9.5146e-02, PNorm = 65.0063, GNorm = 0.9379, lr_0 = 3.7684e-04
Loss = 8.9649e-02, PNorm = 65.0147, GNorm = 0.7366, lr_0 = 3.7659e-04
Loss = 7.6900e-02, PNorm = 65.0226, GNorm = 0.6440, lr_0 = 3.7633e-04
Loss = 6.5768e-02, PNorm = 65.0300, GNorm = 0.6040, lr_0 = 3.7607e-04
Loss = 7.0917e-02, PNorm = 65.0379, GNorm = 0.5542, lr_0 = 3.7581e-04
Loss = 7.9081e-02, PNorm = 65.0466, GNorm = 0.5587, lr_0 = 3.7555e-04
Loss = 7.8655e-02, PNorm = 65.0527, GNorm = 0.8079, lr_0 = 3.7530e-04
Loss = 7.1814e-02, PNorm = 65.0599, GNorm = 0.8038, lr_0 = 3.7504e-04
Loss = 7.0264e-02, PNorm = 65.0666, GNorm = 0.6620, lr_0 = 3.7478e-04
Loss = 7.1767e-02, PNorm = 65.0778, GNorm = 0.5711, lr_0 = 3.7453e-04
Loss = 8.2333e-02, PNorm = 65.0843, GNorm = 0.7261, lr_0 = 3.7427e-04
Loss = 7.8920e-02, PNorm = 65.0911, GNorm = 1.1131, lr_0 = 3.7401e-04
Loss = 7.7715e-02, PNorm = 65.0998, GNorm = 0.9697, lr_0 = 3.7376e-04
Loss = 7.8465e-02, PNorm = 65.1095, GNorm = 0.8885, lr_0 = 3.7350e-04
Loss = 7.7373e-02, PNorm = 65.1185, GNorm = 0.4900, lr_0 = 3.7325e-04
Loss = 8.2469e-02, PNorm = 65.1281, GNorm = 0.5270, lr_0 = 3.7299e-04
Loss = 6.7780e-02, PNorm = 65.1354, GNorm = 0.4229, lr_0 = 3.7273e-04
Validation mae = 0.392101
Epoch 14
Loss = 6.0987e-02, PNorm = 65.1430, GNorm = 0.5267, lr_0 = 3.7248e-04
Loss = 6.2504e-02, PNorm = 65.1499, GNorm = 0.4703, lr_0 = 3.7222e-04
Loss = 6.9612e-02, PNorm = 65.1592, GNorm = 0.7301, lr_0 = 3.7197e-04
Loss = 5.5639e-02, PNorm = 65.1675, GNorm = 0.6196, lr_0 = 3.7171e-04
Loss = 6.0567e-02, PNorm = 65.1753, GNorm = 0.5919, lr_0 = 3.7146e-04
Loss = 6.0790e-02, PNorm = 65.1798, GNorm = 0.5175, lr_0 = 3.7120e-04
Loss = 6.0599e-02, PNorm = 65.1877, GNorm = 0.5052, lr_0 = 3.7095e-04
Loss = 6.1104e-02, PNorm = 65.1945, GNorm = 0.6831, lr_0 = 3.7070e-04
Loss = 6.8603e-02, PNorm = 65.2017, GNorm = 0.6211, lr_0 = 3.7044e-04
Loss = 7.2816e-02, PNorm = 65.2118, GNorm = 0.6490, lr_0 = 3.7019e-04
Loss = 6.3762e-02, PNorm = 65.2213, GNorm = 0.5046, lr_0 = 3.6993e-04
Loss = 7.4535e-02, PNorm = 65.2256, GNorm = 0.6397, lr_0 = 3.6968e-04
Loss = 5.8376e-02, PNorm = 65.2301, GNorm = 0.5105, lr_0 = 3.6943e-04
Loss = 5.6564e-02, PNorm = 65.2340, GNorm = 0.4487, lr_0 = 3.6917e-04
Loss = 6.5217e-02, PNorm = 65.2396, GNorm = 0.5946, lr_0 = 3.6892e-04
Loss = 6.1108e-02, PNorm = 65.2465, GNorm = 0.5765, lr_0 = 3.6867e-04
Loss = 6.4874e-02, PNorm = 65.2535, GNorm = 0.4911, lr_0 = 3.6842e-04
Loss = 7.2577e-02, PNorm = 65.2624, GNorm = 0.6910, lr_0 = 3.6816e-04
Loss = 6.3912e-02, PNorm = 65.2740, GNorm = 0.4976, lr_0 = 3.6791e-04
Loss = 7.2500e-02, PNorm = 65.2865, GNorm = 0.6577, lr_0 = 3.6766e-04
Loss = 7.3794e-02, PNorm = 65.2990, GNorm = 0.6587, lr_0 = 3.6741e-04
Loss = 6.9823e-02, PNorm = 65.3064, GNorm = 0.5580, lr_0 = 3.6716e-04
Loss = 6.9058e-02, PNorm = 65.3107, GNorm = 0.5189, lr_0 = 3.6690e-04
Loss = 6.9951e-02, PNorm = 65.3173, GNorm = 0.5119, lr_0 = 3.6665e-04
Loss = 6.6845e-02, PNorm = 65.3230, GNorm = 0.8414, lr_0 = 3.6640e-04
Loss = 6.7244e-02, PNorm = 65.3302, GNorm = 0.7169, lr_0 = 3.6615e-04
Loss = 6.5119e-02, PNorm = 65.3385, GNorm = 0.3943, lr_0 = 3.6590e-04
Loss = 6.4943e-02, PNorm = 65.3479, GNorm = 0.5142, lr_0 = 3.6565e-04
Loss = 7.0699e-02, PNorm = 65.3581, GNorm = 0.4668, lr_0 = 3.6540e-04
Loss = 7.6414e-02, PNorm = 65.3686, GNorm = 0.6907, lr_0 = 3.6515e-04
Loss = 7.7317e-02, PNorm = 65.3752, GNorm = 0.9470, lr_0 = 3.6490e-04
Loss = 6.9980e-02, PNorm = 65.3841, GNorm = 0.4990, lr_0 = 3.6465e-04
Loss = 6.7532e-02, PNorm = 65.3893, GNorm = 0.5408, lr_0 = 3.6440e-04
Loss = 6.7801e-02, PNorm = 65.3960, GNorm = 1.2524, lr_0 = 3.6415e-04
Loss = 6.9185e-02, PNorm = 65.4050, GNorm = 0.6746, lr_0 = 3.6390e-04
Loss = 7.8010e-02, PNorm = 65.4151, GNorm = 0.6701, lr_0 = 3.6365e-04
Loss = 7.2744e-02, PNorm = 65.4240, GNorm = 0.8016, lr_0 = 3.6340e-04
Loss = 6.5970e-02, PNorm = 65.4311, GNorm = 0.5245, lr_0 = 3.6315e-04
Loss = 6.3507e-02, PNorm = 65.4375, GNorm = 0.5258, lr_0 = 3.6290e-04
Loss = 7.7280e-02, PNorm = 65.4459, GNorm = 0.6939, lr_0 = 3.6266e-04
Loss = 7.2395e-02, PNorm = 65.4537, GNorm = 0.8883, lr_0 = 3.6241e-04
Loss = 7.9066e-02, PNorm = 65.4611, GNorm = 0.9069, lr_0 = 3.6216e-04
Loss = 6.8393e-02, PNorm = 65.4697, GNorm = 0.5863, lr_0 = 3.6191e-04
Loss = 8.0940e-02, PNorm = 65.4783, GNorm = 0.6269, lr_0 = 3.6166e-04
Loss = 7.3606e-02, PNorm = 65.4898, GNorm = 0.7295, lr_0 = 3.6141e-04
Loss = 6.9389e-02, PNorm = 65.4942, GNorm = 0.4521, lr_0 = 3.6117e-04
Loss = 6.9249e-02, PNorm = 65.5018, GNorm = 0.6342, lr_0 = 3.6092e-04
Loss = 6.9436e-02, PNorm = 65.5120, GNorm = 0.7483, lr_0 = 3.6067e-04
Loss = 7.4149e-02, PNorm = 65.5198, GNorm = 0.6846, lr_0 = 3.6043e-04
Loss = 7.1654e-02, PNorm = 65.5266, GNorm = 0.7781, lr_0 = 3.6018e-04
Loss = 7.0198e-02, PNorm = 65.5361, GNorm = 0.6035, lr_0 = 3.5993e-04
Loss = 6.6911e-02, PNorm = 65.5444, GNorm = 0.7614, lr_0 = 3.5969e-04
Loss = 6.8034e-02, PNorm = 65.5559, GNorm = 0.6082, lr_0 = 3.5944e-04
Loss = 6.1975e-02, PNorm = 65.5625, GNorm = 0.5335, lr_0 = 3.5919e-04
Loss = 7.9706e-02, PNorm = 65.5709, GNorm = 0.6800, lr_0 = 3.5895e-04
Loss = 7.1370e-02, PNorm = 65.5832, GNorm = 0.4790, lr_0 = 3.5870e-04
Loss = 6.3728e-02, PNorm = 65.5939, GNorm = 0.5316, lr_0 = 3.5845e-04
Loss = 6.8688e-02, PNorm = 65.6020, GNorm = 0.8692, lr_0 = 3.5821e-04
Loss = 6.8868e-02, PNorm = 65.6064, GNorm = 0.4532, lr_0 = 3.5796e-04
Loss = 6.9321e-02, PNorm = 65.6119, GNorm = 0.6029, lr_0 = 3.5772e-04
Loss = 6.7578e-02, PNorm = 65.6173, GNorm = 0.5125, lr_0 = 3.5747e-04
Loss = 6.5755e-02, PNorm = 65.6205, GNorm = 0.6147, lr_0 = 3.5723e-04
Loss = 6.2127e-02, PNorm = 65.6261, GNorm = 0.7029, lr_0 = 3.5698e-04
Loss = 6.6022e-02, PNorm = 65.6311, GNorm = 0.5445, lr_0 = 3.5674e-04
Loss = 7.3853e-02, PNorm = 65.6358, GNorm = 0.8163, lr_0 = 3.5650e-04
Loss = 7.8095e-02, PNorm = 65.6436, GNorm = 0.6040, lr_0 = 3.5625e-04
Loss = 7.9962e-02, PNorm = 65.6497, GNorm = 0.7065, lr_0 = 3.5601e-04
Loss = 7.0851e-02, PNorm = 65.6535, GNorm = 0.8380, lr_0 = 3.5576e-04
Loss = 7.9044e-02, PNorm = 65.6588, GNorm = 0.5167, lr_0 = 3.5552e-04
Loss = 6.9992e-02, PNorm = 65.6656, GNorm = 0.8907, lr_0 = 3.5528e-04
Loss = 6.5978e-02, PNorm = 65.6770, GNorm = 0.6346, lr_0 = 3.5503e-04
Loss = 6.5639e-02, PNorm = 65.6859, GNorm = 1.0266, lr_0 = 3.5479e-04
Loss = 8.1448e-02, PNorm = 65.6951, GNorm = 1.2148, lr_0 = 3.5455e-04
Loss = 6.4989e-02, PNorm = 65.7026, GNorm = 0.4957, lr_0 = 3.5430e-04
Loss = 6.0471e-02, PNorm = 65.7085, GNorm = 0.5676, lr_0 = 3.5406e-04
Loss = 8.2497e-02, PNorm = 65.7160, GNorm = 0.7385, lr_0 = 3.5382e-04
Loss = 7.0474e-02, PNorm = 65.7277, GNorm = 0.5198, lr_0 = 3.5358e-04
Loss = 9.0008e-02, PNorm = 65.7373, GNorm = 0.9238, lr_0 = 3.5333e-04
Loss = 7.1277e-02, PNorm = 65.7482, GNorm = 0.4744, lr_0 = 3.5309e-04
Loss = 6.0016e-02, PNorm = 65.7540, GNorm = 0.5698, lr_0 = 3.5285e-04
Loss = 7.4583e-02, PNorm = 65.7618, GNorm = 0.6065, lr_0 = 3.5261e-04
Loss = 7.3429e-02, PNorm = 65.7666, GNorm = 0.4463, lr_0 = 3.5237e-04
Loss = 6.5661e-02, PNorm = 65.7733, GNorm = 0.6039, lr_0 = 3.5212e-04
Loss = 6.9735e-02, PNorm = 65.7831, GNorm = 0.6262, lr_0 = 3.5188e-04
Loss = 6.9703e-02, PNorm = 65.7910, GNorm = 0.5173, lr_0 = 3.5164e-04
Loss = 7.1238e-02, PNorm = 65.7983, GNorm = 0.8713, lr_0 = 3.5140e-04
Loss = 7.6405e-02, PNorm = 65.8064, GNorm = 0.6959, lr_0 = 3.5116e-04
Loss = 6.5127e-02, PNorm = 65.8152, GNorm = 0.6309, lr_0 = 3.5092e-04
Loss = 6.6860e-02, PNorm = 65.8220, GNorm = 0.5489, lr_0 = 3.5068e-04
Loss = 7.3029e-02, PNorm = 65.8294, GNorm = 0.4945, lr_0 = 3.5044e-04
Loss = 7.1148e-02, PNorm = 65.8346, GNorm = 0.7344, lr_0 = 3.5020e-04
Loss = 6.8427e-02, PNorm = 65.8399, GNorm = 0.4430, lr_0 = 3.4996e-04
Loss = 6.6418e-02, PNorm = 65.8437, GNorm = 0.5220, lr_0 = 3.4972e-04
Loss = 7.0189e-02, PNorm = 65.8475, GNorm = 0.4881, lr_0 = 3.4948e-04
Loss = 7.0184e-02, PNorm = 65.8520, GNorm = 0.9379, lr_0 = 3.4924e-04
Loss = 6.7090e-02, PNorm = 65.8578, GNorm = 0.6521, lr_0 = 3.4900e-04
Loss = 5.8069e-02, PNorm = 65.8630, GNorm = 0.8355, lr_0 = 3.4876e-04
Loss = 7.3910e-02, PNorm = 65.8689, GNorm = 0.4730, lr_0 = 3.4852e-04
Loss = 5.9894e-02, PNorm = 65.8758, GNorm = 0.4318, lr_0 = 3.4828e-04
Loss = 7.8120e-02, PNorm = 65.8826, GNorm = 0.5611, lr_0 = 3.4805e-04
Loss = 7.3292e-02, PNorm = 65.8883, GNorm = 0.9379, lr_0 = 3.4781e-04
Loss = 6.1504e-02, PNorm = 65.8948, GNorm = 0.6592, lr_0 = 3.4757e-04
Loss = 7.4644e-02, PNorm = 65.8969, GNorm = 0.5023, lr_0 = 3.4733e-04
Loss = 7.7532e-02, PNorm = 65.9026, GNorm = 0.6010, lr_0 = 3.4709e-04
Loss = 6.2155e-02, PNorm = 65.9131, GNorm = 0.7122, lr_0 = 3.4686e-04
Loss = 8.0853e-02, PNorm = 65.9236, GNorm = 0.8869, lr_0 = 3.4662e-04
Loss = 6.7308e-02, PNorm = 65.9316, GNorm = 0.4962, lr_0 = 3.4638e-04
Loss = 7.3719e-02, PNorm = 65.9399, GNorm = 0.6029, lr_0 = 3.4614e-04
Loss = 7.8940e-02, PNorm = 65.9452, GNorm = 0.9927, lr_0 = 3.4591e-04
Loss = 7.4209e-02, PNorm = 65.9521, GNorm = 0.9364, lr_0 = 3.4567e-04
Loss = 6.1426e-02, PNorm = 65.9564, GNorm = 0.5760, lr_0 = 3.4543e-04
Loss = 7.5789e-02, PNorm = 65.9597, GNorm = 0.4268, lr_0 = 3.4520e-04
Loss = 6.6252e-02, PNorm = 65.9669, GNorm = 0.6601, lr_0 = 3.4496e-04
Loss = 8.8607e-02, PNorm = 65.9778, GNorm = 0.6623, lr_0 = 3.4472e-04
Loss = 7.4490e-02, PNorm = 65.9883, GNorm = 0.4655, lr_0 = 3.4449e-04
Loss = 6.6650e-02, PNorm = 65.9945, GNorm = 0.3650, lr_0 = 3.4425e-04
Loss = 8.2136e-02, PNorm = 65.9997, GNorm = 0.5243, lr_0 = 3.4402e-04
Loss = 6.8567e-02, PNorm = 66.0080, GNorm = 0.5602, lr_0 = 3.4378e-04
Loss = 6.5096e-02, PNorm = 66.0143, GNorm = 0.6479, lr_0 = 3.4354e-04
Loss = 6.7954e-02, PNorm = 66.0174, GNorm = 0.4863, lr_0 = 3.4331e-04
Validation mae = 0.388016
Epoch 15
Loss = 6.2776e-02, PNorm = 66.0261, GNorm = 0.6654, lr_0 = 3.4307e-04
Loss = 6.2319e-02, PNorm = 66.0361, GNorm = 0.3899, lr_0 = 3.4284e-04
Loss = 6.3883e-02, PNorm = 66.0428, GNorm = 0.6549, lr_0 = 3.4260e-04
Loss = 5.4905e-02, PNorm = 66.0483, GNorm = 0.3981, lr_0 = 3.4237e-04
Loss = 6.0742e-02, PNorm = 66.0527, GNorm = 0.3729, lr_0 = 3.4213e-04
Loss = 5.8349e-02, PNorm = 66.0565, GNorm = 0.6911, lr_0 = 3.4190e-04
Loss = 6.2629e-02, PNorm = 66.0623, GNorm = 0.5400, lr_0 = 3.4167e-04
Loss = 5.9379e-02, PNorm = 66.0706, GNorm = 0.6755, lr_0 = 3.4143e-04
Loss = 5.8050e-02, PNorm = 66.0772, GNorm = 0.5459, lr_0 = 3.4120e-04
Loss = 6.0348e-02, PNorm = 66.0866, GNorm = 0.4579, lr_0 = 3.4096e-04
Loss = 6.0716e-02, PNorm = 66.0935, GNorm = 0.7149, lr_0 = 3.4073e-04
Loss = 5.4121e-02, PNorm = 66.1006, GNorm = 0.7407, lr_0 = 3.4050e-04
Loss = 6.3112e-02, PNorm = 66.1096, GNorm = 0.5020, lr_0 = 3.4026e-04
Loss = 5.7642e-02, PNorm = 66.1163, GNorm = 0.3892, lr_0 = 3.4003e-04
Loss = 5.6538e-02, PNorm = 66.1196, GNorm = 0.4586, lr_0 = 3.3980e-04
Loss = 6.4028e-02, PNorm = 66.1232, GNorm = 0.7041, lr_0 = 3.3956e-04
Loss = 5.7753e-02, PNorm = 66.1278, GNorm = 0.8402, lr_0 = 3.3933e-04
Loss = 6.2344e-02, PNorm = 66.1385, GNorm = 0.6869, lr_0 = 3.3910e-04
Loss = 6.8809e-02, PNorm = 66.1454, GNorm = 0.4887, lr_0 = 3.3887e-04
Loss = 6.4825e-02, PNorm = 66.1495, GNorm = 0.6678, lr_0 = 3.3864e-04
Loss = 6.3902e-02, PNorm = 66.1529, GNorm = 0.4905, lr_0 = 3.3840e-04
Loss = 6.5663e-02, PNorm = 66.1605, GNorm = 0.3395, lr_0 = 3.3817e-04
Loss = 6.1761e-02, PNorm = 66.1688, GNorm = 0.6090, lr_0 = 3.3794e-04
Loss = 6.0515e-02, PNorm = 66.1727, GNorm = 0.4449, lr_0 = 3.3771e-04
Loss = 6.6576e-02, PNorm = 66.1772, GNorm = 0.6052, lr_0 = 3.3748e-04
Loss = 6.9590e-02, PNorm = 66.1845, GNorm = 0.6021, lr_0 = 3.3725e-04
Loss = 6.3895e-02, PNorm = 66.1914, GNorm = 0.3915, lr_0 = 3.3701e-04
Loss = 5.0375e-02, PNorm = 66.1965, GNorm = 0.4152, lr_0 = 3.3678e-04
Loss = 6.7047e-02, PNorm = 66.2045, GNorm = 0.5006, lr_0 = 3.3655e-04
Loss = 5.6375e-02, PNorm = 66.2083, GNorm = 0.7306, lr_0 = 3.3632e-04
Loss = 6.3502e-02, PNorm = 66.2137, GNorm = 0.6167, lr_0 = 3.3609e-04
Loss = 7.2804e-02, PNorm = 66.2214, GNorm = 0.7894, lr_0 = 3.3586e-04
Loss = 6.7740e-02, PNorm = 66.2260, GNorm = 0.3678, lr_0 = 3.3563e-04
Loss = 7.2461e-02, PNorm = 66.2326, GNorm = 0.9705, lr_0 = 3.3540e-04
Loss = 6.4414e-02, PNorm = 66.2404, GNorm = 0.9246, lr_0 = 3.3517e-04
Loss = 6.3381e-02, PNorm = 66.2487, GNorm = 0.5260, lr_0 = 3.3494e-04
Loss = 6.3853e-02, PNorm = 66.2547, GNorm = 0.6115, lr_0 = 3.3471e-04
Loss = 7.1713e-02, PNorm = 66.2625, GNorm = 0.6006, lr_0 = 3.3448e-04
Loss = 6.5649e-02, PNorm = 66.2720, GNorm = 0.4143, lr_0 = 3.3425e-04
Loss = 7.1180e-02, PNorm = 66.2786, GNorm = 0.6329, lr_0 = 3.3403e-04
Loss = 5.3181e-02, PNorm = 66.2838, GNorm = 0.4064, lr_0 = 3.3380e-04
Loss = 5.2807e-02, PNorm = 66.2909, GNorm = 0.5204, lr_0 = 3.3357e-04
Loss = 6.5545e-02, PNorm = 66.2990, GNorm = 0.5114, lr_0 = 3.3334e-04
Loss = 7.7606e-02, PNorm = 66.3061, GNorm = 0.6081, lr_0 = 3.3311e-04
Loss = 6.9088e-02, PNorm = 66.3127, GNorm = 0.6227, lr_0 = 3.3288e-04
Loss = 6.4715e-02, PNorm = 66.3176, GNorm = 0.5624, lr_0 = 3.3265e-04
Loss = 6.2057e-02, PNorm = 66.3221, GNorm = 0.5438, lr_0 = 3.3243e-04
Loss = 6.6665e-02, PNorm = 66.3275, GNorm = 0.6506, lr_0 = 3.3220e-04
Loss = 5.9587e-02, PNorm = 66.3357, GNorm = 0.5496, lr_0 = 3.3197e-04
Loss = 5.6750e-02, PNorm = 66.3445, GNorm = 0.5051, lr_0 = 3.3174e-04
Loss = 5.9879e-02, PNorm = 66.3515, GNorm = 0.4237, lr_0 = 3.3152e-04
Loss = 6.3840e-02, PNorm = 66.3618, GNorm = 0.6233, lr_0 = 3.3129e-04
Loss = 6.2668e-02, PNorm = 66.3696, GNorm = 0.4829, lr_0 = 3.3106e-04
Loss = 6.9574e-02, PNorm = 66.3771, GNorm = 0.9299, lr_0 = 3.3084e-04
Loss = 6.8486e-02, PNorm = 66.3852, GNorm = 1.4233, lr_0 = 3.3061e-04
Loss = 5.8048e-02, PNorm = 66.3945, GNorm = 0.5387, lr_0 = 3.3038e-04
Loss = 7.1007e-02, PNorm = 66.4056, GNorm = 0.3698, lr_0 = 3.3016e-04
Loss = 5.8361e-02, PNorm = 66.4128, GNorm = 0.5218, lr_0 = 3.2993e-04
Loss = 6.5687e-02, PNorm = 66.4201, GNorm = 0.6370, lr_0 = 3.2970e-04
Loss = 7.2507e-02, PNorm = 66.4265, GNorm = 0.6092, lr_0 = 3.2948e-04
Loss = 6.6783e-02, PNorm = 66.4331, GNorm = 0.6644, lr_0 = 3.2925e-04
Loss = 6.4017e-02, PNorm = 66.4363, GNorm = 0.6609, lr_0 = 3.2903e-04
Loss = 6.5746e-02, PNorm = 66.4403, GNorm = 0.7418, lr_0 = 3.2880e-04
Loss = 6.9640e-02, PNorm = 66.4471, GNorm = 1.3453, lr_0 = 3.2858e-04
Loss = 7.1805e-02, PNorm = 66.4567, GNorm = 0.4372, lr_0 = 3.2835e-04
Loss = 6.9510e-02, PNorm = 66.4636, GNorm = 0.5904, lr_0 = 3.2813e-04
Loss = 7.1266e-02, PNorm = 66.4679, GNorm = 0.6498, lr_0 = 3.2790e-04
Loss = 7.7801e-02, PNorm = 66.4752, GNorm = 0.4953, lr_0 = 3.2768e-04
Loss = 6.3463e-02, PNorm = 66.4826, GNorm = 0.5571, lr_0 = 3.2745e-04
Loss = 6.8538e-02, PNorm = 66.4895, GNorm = 0.4751, lr_0 = 3.2723e-04
Loss = 6.2959e-02, PNorm = 66.4970, GNorm = 0.5285, lr_0 = 3.2700e-04
Loss = 6.4892e-02, PNorm = 66.5033, GNorm = 0.6461, lr_0 = 3.2678e-04
Loss = 6.5415e-02, PNorm = 66.5095, GNorm = 0.6960, lr_0 = 3.2656e-04
Loss = 6.1119e-02, PNorm = 66.5191, GNorm = 0.6987, lr_0 = 3.2633e-04
Loss = 5.8723e-02, PNorm = 66.5243, GNorm = 0.4232, lr_0 = 3.2611e-04
Loss = 6.6508e-02, PNorm = 66.5288, GNorm = 0.6341, lr_0 = 3.2589e-04
Loss = 7.9263e-02, PNorm = 66.5373, GNorm = 1.2899, lr_0 = 3.2566e-04
Loss = 6.7551e-02, PNorm = 66.5465, GNorm = 0.7007, lr_0 = 3.2544e-04
Loss = 7.4840e-02, PNorm = 66.5521, GNorm = 0.4754, lr_0 = 3.2522e-04
Loss = 5.5615e-02, PNorm = 66.5562, GNorm = 0.3853, lr_0 = 3.2499e-04
Loss = 6.6322e-02, PNorm = 66.5645, GNorm = 0.8473, lr_0 = 3.2477e-04
Loss = 5.8055e-02, PNorm = 66.5702, GNorm = 0.5478, lr_0 = 3.2455e-04
Loss = 7.6929e-02, PNorm = 66.5760, GNorm = 0.6228, lr_0 = 3.2433e-04
Loss = 5.8412e-02, PNorm = 66.5810, GNorm = 0.5892, lr_0 = 3.2410e-04
Loss = 6.3324e-02, PNorm = 66.5865, GNorm = 0.6627, lr_0 = 3.2388e-04
Loss = 6.1677e-02, PNorm = 66.5953, GNorm = 0.8370, lr_0 = 3.2366e-04
Loss = 6.6156e-02, PNorm = 66.6033, GNorm = 0.4888, lr_0 = 3.2344e-04
Loss = 6.8921e-02, PNorm = 66.6084, GNorm = 1.0322, lr_0 = 3.2322e-04
Loss = 6.5364e-02, PNorm = 66.6145, GNorm = 1.1053, lr_0 = 3.2300e-04
Loss = 6.4559e-02, PNorm = 66.6197, GNorm = 0.4866, lr_0 = 3.2277e-04
Loss = 5.9199e-02, PNorm = 66.6242, GNorm = 0.6719, lr_0 = 3.2255e-04
Loss = 6.0631e-02, PNorm = 66.6293, GNorm = 0.4008, lr_0 = 3.2233e-04
Loss = 7.2596e-02, PNorm = 66.6362, GNorm = 0.5932, lr_0 = 3.2211e-04
Loss = 5.9053e-02, PNorm = 66.6416, GNorm = 0.5093, lr_0 = 3.2189e-04
Loss = 5.9363e-02, PNorm = 66.6459, GNorm = 0.6305, lr_0 = 3.2167e-04
Loss = 6.1060e-02, PNorm = 66.6524, GNorm = 0.4903, lr_0 = 3.2145e-04
Loss = 7.0631e-02, PNorm = 66.6579, GNorm = 0.6100, lr_0 = 3.2123e-04
Loss = 6.4237e-02, PNorm = 66.6622, GNorm = 0.7145, lr_0 = 3.2101e-04
Loss = 6.5024e-02, PNorm = 66.6682, GNorm = 0.6633, lr_0 = 3.2079e-04
Loss = 6.6974e-02, PNorm = 66.6781, GNorm = 0.4342, lr_0 = 3.2057e-04
Loss = 7.2148e-02, PNorm = 66.6866, GNorm = 0.8073, lr_0 = 3.2035e-04
Loss = 6.3108e-02, PNorm = 66.6917, GNorm = 0.4852, lr_0 = 3.2013e-04
Loss = 6.4506e-02, PNorm = 66.6949, GNorm = 0.9421, lr_0 = 3.1991e-04
Loss = 6.3715e-02, PNorm = 66.6987, GNorm = 0.5821, lr_0 = 3.1969e-04
Loss = 6.1005e-02, PNorm = 66.7043, GNorm = 0.5135, lr_0 = 3.1947e-04
Loss = 6.8183e-02, PNorm = 66.7125, GNorm = 1.0192, lr_0 = 3.1925e-04
Loss = 6.1833e-02, PNorm = 66.7195, GNorm = 0.7443, lr_0 = 3.1904e-04
Loss = 6.2350e-02, PNorm = 66.7256, GNorm = 0.5397, lr_0 = 3.1882e-04
Loss = 6.7155e-02, PNorm = 66.7330, GNorm = 0.6427, lr_0 = 3.1860e-04
Loss = 7.1315e-02, PNorm = 66.7401, GNorm = 0.6513, lr_0 = 3.1838e-04
Loss = 6.5838e-02, PNorm = 66.7438, GNorm = 0.5560, lr_0 = 3.1816e-04
Loss = 6.8021e-02, PNorm = 66.7489, GNorm = 0.9777, lr_0 = 3.1794e-04
Loss = 6.6563e-02, PNorm = 66.7544, GNorm = 0.7146, lr_0 = 3.1773e-04
Loss = 7.2992e-02, PNorm = 66.7615, GNorm = 0.4658, lr_0 = 3.1751e-04
Loss = 6.9453e-02, PNorm = 66.7683, GNorm = 0.5189, lr_0 = 3.1729e-04
Loss = 6.2509e-02, PNorm = 66.7740, GNorm = 0.3982, lr_0 = 3.1707e-04
Loss = 6.8638e-02, PNorm = 66.7812, GNorm = 0.7509, lr_0 = 3.1686e-04
Loss = 6.4597e-02, PNorm = 66.7894, GNorm = 0.8600, lr_0 = 3.1664e-04
Loss = 7.6844e-02, PNorm = 66.7965, GNorm = 0.9773, lr_0 = 3.1642e-04
Loss = 6.0506e-02, PNorm = 66.8002, GNorm = 0.5166, lr_0 = 3.1621e-04
Validation mae = 0.395730
Epoch 16
Loss = 5.6370e-02, PNorm = 66.8075, GNorm = 0.3830, lr_0 = 3.1599e-04
Loss = 5.9421e-02, PNorm = 66.8150, GNorm = 0.6693, lr_0 = 3.1577e-04
Loss = 6.1965e-02, PNorm = 66.8237, GNorm = 0.5033, lr_0 = 3.1556e-04
Loss = 6.3305e-02, PNorm = 66.8332, GNorm = 0.6837, lr_0 = 3.1534e-04
Loss = 6.0913e-02, PNorm = 66.8386, GNorm = 0.7850, lr_0 = 3.1512e-04
Loss = 6.0606e-02, PNorm = 66.8420, GNorm = 0.4497, lr_0 = 3.1491e-04
Loss = 6.3975e-02, PNorm = 66.8468, GNorm = 0.6632, lr_0 = 3.1469e-04
Loss = 6.6014e-02, PNorm = 66.8562, GNorm = 0.6180, lr_0 = 3.1448e-04
Loss = 5.7063e-02, PNorm = 66.8645, GNorm = 0.7880, lr_0 = 3.1426e-04
Loss = 6.5443e-02, PNorm = 66.8721, GNorm = 0.8084, lr_0 = 3.1405e-04
Loss = 6.5690e-02, PNorm = 66.8806, GNorm = 0.4974, lr_0 = 3.1383e-04
Loss = 6.1528e-02, PNorm = 66.8879, GNorm = 0.5990, lr_0 = 3.1362e-04
Loss = 5.3556e-02, PNorm = 66.8952, GNorm = 0.4780, lr_0 = 3.1340e-04
Loss = 5.2651e-02, PNorm = 66.9031, GNorm = 0.4177, lr_0 = 3.1319e-04
Loss = 5.3694e-02, PNorm = 66.9099, GNorm = 0.4787, lr_0 = 3.1297e-04
Loss = 5.1492e-02, PNorm = 66.9117, GNorm = 0.4770, lr_0 = 3.1276e-04
Loss = 5.7481e-02, PNorm = 66.9141, GNorm = 0.6190, lr_0 = 3.1254e-04
Loss = 4.8423e-02, PNorm = 66.9195, GNorm = 0.5036, lr_0 = 3.1233e-04
Loss = 5.7030e-02, PNorm = 66.9257, GNorm = 0.5027, lr_0 = 3.1212e-04
Loss = 6.7484e-02, PNorm = 66.9302, GNorm = 1.0635, lr_0 = 3.1190e-04
Loss = 5.6842e-02, PNorm = 66.9393, GNorm = 0.6226, lr_0 = 3.1169e-04
Loss = 6.5478e-02, PNorm = 66.9452, GNorm = 0.5573, lr_0 = 3.1147e-04
Loss = 5.8537e-02, PNorm = 66.9498, GNorm = 0.5683, lr_0 = 3.1126e-04
Loss = 5.6894e-02, PNorm = 66.9545, GNorm = 0.5312, lr_0 = 3.1105e-04
Loss = 6.0743e-02, PNorm = 66.9618, GNorm = 0.7400, lr_0 = 3.1083e-04
Loss = 6.0271e-02, PNorm = 66.9707, GNorm = 0.8473, lr_0 = 3.1062e-04
Loss = 5.2371e-02, PNorm = 66.9802, GNorm = 0.4074, lr_0 = 3.1041e-04
Loss = 6.5063e-02, PNorm = 66.9868, GNorm = 0.8775, lr_0 = 3.1020e-04
Loss = 5.7994e-02, PNorm = 66.9917, GNorm = 1.1457, lr_0 = 3.0998e-04
Loss = 6.2562e-02, PNorm = 66.9981, GNorm = 0.7689, lr_0 = 3.0977e-04
Loss = 6.1413e-02, PNorm = 67.0060, GNorm = 0.6748, lr_0 = 3.0956e-04
Loss = 6.3669e-02, PNorm = 67.0104, GNorm = 0.6081, lr_0 = 3.0935e-04
Loss = 6.3378e-02, PNorm = 67.0149, GNorm = 0.4364, lr_0 = 3.0914e-04
Loss = 6.3017e-02, PNorm = 67.0216, GNorm = 0.7555, lr_0 = 3.0892e-04
Loss = 5.9177e-02, PNorm = 67.0297, GNorm = 0.4745, lr_0 = 3.0871e-04
Loss = 5.3519e-02, PNorm = 67.0356, GNorm = 0.9876, lr_0 = 3.0850e-04
Loss = 5.6612e-02, PNorm = 67.0418, GNorm = 0.4814, lr_0 = 3.0829e-04
Loss = 5.7849e-02, PNorm = 67.0481, GNorm = 0.4542, lr_0 = 3.0808e-04
Loss = 5.7812e-02, PNorm = 67.0527, GNorm = 0.4581, lr_0 = 3.0787e-04
Loss = 6.2565e-02, PNorm = 67.0604, GNorm = 0.6551, lr_0 = 3.0766e-04
Loss = 5.6766e-02, PNorm = 67.0675, GNorm = 0.5554, lr_0 = 3.0745e-04
Loss = 5.2457e-02, PNorm = 67.0737, GNorm = 0.5261, lr_0 = 3.0723e-04
Loss = 6.2888e-02, PNorm = 67.0785, GNorm = 0.4468, lr_0 = 3.0702e-04
Loss = 5.0106e-02, PNorm = 67.0817, GNorm = 0.4895, lr_0 = 3.0681e-04
Loss = 5.8682e-02, PNorm = 67.0868, GNorm = 0.6840, lr_0 = 3.0660e-04
Loss = 5.3661e-02, PNorm = 67.0914, GNorm = 0.6818, lr_0 = 3.0639e-04
Loss = 5.5319e-02, PNorm = 67.0942, GNorm = 0.4597, lr_0 = 3.0618e-04
Loss = 5.6187e-02, PNorm = 67.0999, GNorm = 0.4201, lr_0 = 3.0597e-04
Loss = 5.6095e-02, PNorm = 67.1063, GNorm = 0.3981, lr_0 = 3.0576e-04
Loss = 6.2820e-02, PNorm = 67.1126, GNorm = 0.9455, lr_0 = 3.0555e-04
Loss = 6.3057e-02, PNorm = 67.1191, GNorm = 0.7002, lr_0 = 3.0535e-04
Loss = 5.1177e-02, PNorm = 67.1230, GNorm = 0.7981, lr_0 = 3.0514e-04
Loss = 5.4559e-02, PNorm = 67.1278, GNorm = 1.0814, lr_0 = 3.0493e-04
Loss = 6.1092e-02, PNorm = 67.1320, GNorm = 0.4956, lr_0 = 3.0472e-04
Loss = 5.6334e-02, PNorm = 67.1344, GNorm = 0.5547, lr_0 = 3.0451e-04
Loss = 5.3447e-02, PNorm = 67.1392, GNorm = 0.5690, lr_0 = 3.0430e-04
Loss = 6.7185e-02, PNorm = 67.1482, GNorm = 0.6425, lr_0 = 3.0409e-04
Loss = 6.8276e-02, PNorm = 67.1559, GNorm = 0.4650, lr_0 = 3.0388e-04
Loss = 6.1825e-02, PNorm = 67.1618, GNorm = 0.5361, lr_0 = 3.0368e-04
Loss = 6.7963e-02, PNorm = 67.1662, GNorm = 0.6835, lr_0 = 3.0347e-04
Loss = 7.2341e-02, PNorm = 67.1709, GNorm = 0.7441, lr_0 = 3.0326e-04
Loss = 5.7868e-02, PNorm = 67.1777, GNorm = 0.7414, lr_0 = 3.0305e-04
Loss = 6.0789e-02, PNorm = 67.1868, GNorm = 0.5676, lr_0 = 3.0284e-04
Loss = 7.0177e-02, PNorm = 67.1927, GNorm = 0.5765, lr_0 = 3.0264e-04
Loss = 5.8815e-02, PNorm = 67.1976, GNorm = 0.5147, lr_0 = 3.0243e-04
Loss = 4.7546e-02, PNorm = 67.2040, GNorm = 0.5759, lr_0 = 3.0222e-04
Loss = 6.1179e-02, PNorm = 67.2080, GNorm = 1.0923, lr_0 = 3.0202e-04
Loss = 5.7095e-02, PNorm = 67.2124, GNorm = 0.6422, lr_0 = 3.0181e-04
Loss = 5.3426e-02, PNorm = 67.2169, GNorm = 0.7054, lr_0 = 3.0160e-04
Loss = 6.9805e-02, PNorm = 67.2218, GNorm = 0.6168, lr_0 = 3.0140e-04
Loss = 5.8760e-02, PNorm = 67.2282, GNorm = 0.4154, lr_0 = 3.0119e-04
Loss = 6.0314e-02, PNorm = 67.2346, GNorm = 0.4705, lr_0 = 3.0098e-04
Loss = 5.9498e-02, PNorm = 67.2399, GNorm = 0.5597, lr_0 = 3.0078e-04
Loss = 5.9766e-02, PNorm = 67.2467, GNorm = 0.4699, lr_0 = 3.0057e-04
Loss = 6.0876e-02, PNorm = 67.2510, GNorm = 0.6050, lr_0 = 3.0036e-04
Loss = 6.1341e-02, PNorm = 67.2544, GNorm = 0.4728, lr_0 = 3.0016e-04
Loss = 6.9993e-02, PNorm = 67.2618, GNorm = 1.3194, lr_0 = 2.9995e-04
Loss = 5.7884e-02, PNorm = 67.2687, GNorm = 0.5363, lr_0 = 2.9975e-04
Loss = 5.8794e-02, PNorm = 67.2751, GNorm = 0.8772, lr_0 = 2.9954e-04
Loss = 6.3582e-02, PNorm = 67.2799, GNorm = 0.4844, lr_0 = 2.9934e-04
Loss = 8.1836e-02, PNorm = 67.2840, GNorm = 0.5563, lr_0 = 2.9913e-04
Loss = 5.8774e-02, PNorm = 67.2918, GNorm = 0.7659, lr_0 = 2.9893e-04
Loss = 5.8490e-02, PNorm = 67.2991, GNorm = 0.5449, lr_0 = 2.9872e-04
Loss = 6.1407e-02, PNorm = 67.3076, GNorm = 0.7395, lr_0 = 2.9852e-04
Loss = 6.0015e-02, PNorm = 67.3122, GNorm = 0.6278, lr_0 = 2.9831e-04
Loss = 6.4530e-02, PNorm = 67.3195, GNorm = 0.5805, lr_0 = 2.9811e-04
Loss = 5.7003e-02, PNorm = 67.3263, GNorm = 0.5048, lr_0 = 2.9790e-04
Loss = 5.4840e-02, PNorm = 67.3296, GNorm = 0.4391, lr_0 = 2.9770e-04
Loss = 6.0860e-02, PNorm = 67.3324, GNorm = 0.8107, lr_0 = 2.9750e-04
Loss = 6.3033e-02, PNorm = 67.3369, GNorm = 0.7796, lr_0 = 2.9729e-04
Loss = 7.1614e-02, PNorm = 67.3411, GNorm = 0.7081, lr_0 = 2.9709e-04
Loss = 6.2418e-02, PNorm = 67.3472, GNorm = 0.6680, lr_0 = 2.9689e-04
Loss = 6.1934e-02, PNorm = 67.3542, GNorm = 0.5064, lr_0 = 2.9668e-04
Loss = 5.6262e-02, PNorm = 67.3604, GNorm = 0.6221, lr_0 = 2.9648e-04
Loss = 6.3255e-02, PNorm = 67.3677, GNorm = 0.4937, lr_0 = 2.9628e-04
Loss = 6.9184e-02, PNorm = 67.3738, GNorm = 0.9643, lr_0 = 2.9607e-04
Loss = 6.4453e-02, PNorm = 67.3800, GNorm = 0.5031, lr_0 = 2.9587e-04
Loss = 5.8475e-02, PNorm = 67.3856, GNorm = 0.6436, lr_0 = 2.9567e-04
Loss = 5.6729e-02, PNorm = 67.3904, GNorm = 0.4855, lr_0 = 2.9546e-04
Loss = 5.9226e-02, PNorm = 67.3954, GNorm = 0.4197, lr_0 = 2.9526e-04
Loss = 5.6125e-02, PNorm = 67.4008, GNorm = 0.4559, lr_0 = 2.9506e-04
Loss = 6.3179e-02, PNorm = 67.4069, GNorm = 0.4336, lr_0 = 2.9486e-04
Loss = 6.7075e-02, PNorm = 67.4128, GNorm = 0.9992, lr_0 = 2.9466e-04
Loss = 7.6105e-02, PNorm = 67.4208, GNorm = 0.7333, lr_0 = 2.9445e-04
Loss = 5.9829e-02, PNorm = 67.4267, GNorm = 0.5389, lr_0 = 2.9425e-04
Loss = 7.2441e-02, PNorm = 67.4318, GNorm = 0.6441, lr_0 = 2.9405e-04
Loss = 6.0022e-02, PNorm = 67.4371, GNorm = 0.5802, lr_0 = 2.9385e-04
Loss = 5.8745e-02, PNorm = 67.4414, GNorm = 0.8249, lr_0 = 2.9365e-04
Loss = 6.2421e-02, PNorm = 67.4456, GNorm = 1.3561, lr_0 = 2.9345e-04
Loss = 5.4281e-02, PNorm = 67.4499, GNorm = 0.3980, lr_0 = 2.9325e-04
Loss = 6.7433e-02, PNorm = 67.4565, GNorm = 0.5618, lr_0 = 2.9305e-04
Loss = 7.8027e-02, PNorm = 67.4651, GNorm = 0.5427, lr_0 = 2.9284e-04
Loss = 5.5928e-02, PNorm = 67.4708, GNorm = 0.3867, lr_0 = 2.9264e-04
Loss = 5.7025e-02, PNorm = 67.4736, GNorm = 0.9939, lr_0 = 2.9244e-04
Loss = 5.5663e-02, PNorm = 67.4744, GNorm = 0.4167, lr_0 = 2.9224e-04
Loss = 6.4766e-02, PNorm = 67.4775, GNorm = 0.6064, lr_0 = 2.9204e-04
Loss = 6.5379e-02, PNorm = 67.4817, GNorm = 0.7061, lr_0 = 2.9184e-04
Loss = 6.3210e-02, PNorm = 67.4850, GNorm = 0.5634, lr_0 = 2.9164e-04
Loss = 6.3889e-02, PNorm = 67.4904, GNorm = 0.5709, lr_0 = 2.9144e-04
Loss = 6.1316e-02, PNorm = 67.4978, GNorm = 1.0780, lr_0 = 2.9124e-04
Validation mae = 0.390632
Epoch 17
Loss = 6.1685e-02, PNorm = 67.5043, GNorm = 0.5635, lr_0 = 2.9104e-04
Loss = 5.3750e-02, PNorm = 67.5118, GNorm = 0.4239, lr_0 = 2.9084e-04
Loss = 5.6678e-02, PNorm = 67.5188, GNorm = 0.5445, lr_0 = 2.9065e-04
Loss = 5.6649e-02, PNorm = 67.5253, GNorm = 0.4950, lr_0 = 2.9045e-04
Loss = 5.9636e-02, PNorm = 67.5352, GNorm = 0.4620, lr_0 = 2.9025e-04
Loss = 5.8190e-02, PNorm = 67.5445, GNorm = 0.4252, lr_0 = 2.9005e-04
Loss = 5.4862e-02, PNorm = 67.5524, GNorm = 0.6492, lr_0 = 2.8985e-04
Loss = 6.9870e-02, PNorm = 67.5582, GNorm = 0.7388, lr_0 = 2.8965e-04
Loss = 5.0772e-02, PNorm = 67.5644, GNorm = 0.4961, lr_0 = 2.8945e-04
Loss = 5.9835e-02, PNorm = 67.5693, GNorm = 0.6865, lr_0 = 2.8925e-04
Loss = 5.8158e-02, PNorm = 67.5765, GNorm = 0.6391, lr_0 = 2.8906e-04
Loss = 5.2309e-02, PNorm = 67.5851, GNorm = 0.6119, lr_0 = 2.8886e-04
Loss = 5.8191e-02, PNorm = 67.5923, GNorm = 0.6466, lr_0 = 2.8866e-04
Loss = 5.6120e-02, PNorm = 67.5978, GNorm = 0.5285, lr_0 = 2.8846e-04
Loss = 5.4730e-02, PNorm = 67.6038, GNorm = 0.7347, lr_0 = 2.8826e-04
Loss = 5.7159e-02, PNorm = 67.6075, GNorm = 0.3874, lr_0 = 2.8807e-04
Loss = 4.2566e-02, PNorm = 67.6138, GNorm = 0.5666, lr_0 = 2.8787e-04
Loss = 5.2485e-02, PNorm = 67.6193, GNorm = 0.3420, lr_0 = 2.8767e-04
Loss = 5.2978e-02, PNorm = 67.6231, GNorm = 0.4447, lr_0 = 2.8748e-04
Loss = 5.1336e-02, PNorm = 67.6258, GNorm = 0.5239, lr_0 = 2.8728e-04
Loss = 5.8513e-02, PNorm = 67.6270, GNorm = 0.5163, lr_0 = 2.8708e-04
Loss = 5.8033e-02, PNorm = 67.6338, GNorm = 0.6386, lr_0 = 2.8689e-04
Loss = 5.2602e-02, PNorm = 67.6408, GNorm = 0.5261, lr_0 = 2.8669e-04
Loss = 5.0202e-02, PNorm = 67.6443, GNorm = 0.6765, lr_0 = 2.8649e-04
Loss = 5.1365e-02, PNorm = 67.6486, GNorm = 0.6479, lr_0 = 2.8630e-04
Loss = 5.3386e-02, PNorm = 67.6546, GNorm = 0.6016, lr_0 = 2.8610e-04
Loss = 6.2550e-02, PNorm = 67.6593, GNorm = 0.6214, lr_0 = 2.8590e-04
Loss = 5.5103e-02, PNorm = 67.6618, GNorm = 0.6994, lr_0 = 2.8571e-04
Loss = 5.7914e-02, PNorm = 67.6673, GNorm = 0.5337, lr_0 = 2.8551e-04
Loss = 5.4310e-02, PNorm = 67.6725, GNorm = 0.5030, lr_0 = 2.8532e-04
Loss = 5.1297e-02, PNorm = 67.6767, GNorm = 0.7556, lr_0 = 2.8512e-04
Loss = 6.2985e-02, PNorm = 67.6847, GNorm = 1.4414, lr_0 = 2.8493e-04
Loss = 6.4865e-02, PNorm = 67.6917, GNorm = 0.5156, lr_0 = 2.8473e-04
Loss = 5.6407e-02, PNorm = 67.6974, GNorm = 0.3892, lr_0 = 2.8454e-04
Loss = 6.1110e-02, PNorm = 67.7040, GNorm = 0.5968, lr_0 = 2.8434e-04
Loss = 5.7076e-02, PNorm = 67.7095, GNorm = 0.5061, lr_0 = 2.8415e-04
Loss = 5.3646e-02, PNorm = 67.7170, GNorm = 0.3913, lr_0 = 2.8395e-04
Loss = 5.1953e-02, PNorm = 67.7221, GNorm = 0.5911, lr_0 = 2.8376e-04
Loss = 4.7037e-02, PNorm = 67.7272, GNorm = 0.4789, lr_0 = 2.8356e-04
Loss = 5.1881e-02, PNorm = 67.7306, GNorm = 0.5057, lr_0 = 2.8337e-04
Loss = 5.8449e-02, PNorm = 67.7341, GNorm = 0.7295, lr_0 = 2.8317e-04
Loss = 5.5832e-02, PNorm = 67.7381, GNorm = 0.6701, lr_0 = 2.8298e-04
Loss = 4.9898e-02, PNorm = 67.7439, GNorm = 0.4588, lr_0 = 2.8279e-04
Loss = 6.0671e-02, PNorm = 67.7508, GNorm = 0.6525, lr_0 = 2.8259e-04
Loss = 5.5301e-02, PNorm = 67.7550, GNorm = 0.6109, lr_0 = 2.8240e-04
Loss = 6.1507e-02, PNorm = 67.7582, GNorm = 0.5325, lr_0 = 2.8221e-04
Loss = 6.5197e-02, PNorm = 67.7648, GNorm = 0.5088, lr_0 = 2.8201e-04
Loss = 6.4311e-02, PNorm = 67.7741, GNorm = 0.9252, lr_0 = 2.8182e-04
Loss = 5.0031e-02, PNorm = 67.7838, GNorm = 0.4725, lr_0 = 2.8163e-04
Loss = 4.9673e-02, PNorm = 67.7898, GNorm = 0.3675, lr_0 = 2.8143e-04
Loss = 4.9915e-02, PNorm = 67.7934, GNorm = 0.5181, lr_0 = 2.8124e-04
Loss = 5.4897e-02, PNorm = 67.7966, GNorm = 0.8006, lr_0 = 2.8105e-04
Loss = 5.7552e-02, PNorm = 67.8007, GNorm = 0.6883, lr_0 = 2.8085e-04
Loss = 6.0021e-02, PNorm = 67.8054, GNorm = 0.4978, lr_0 = 2.8066e-04
Loss = 5.6673e-02, PNorm = 67.8105, GNorm = 0.7115, lr_0 = 2.8047e-04
Loss = 5.4258e-02, PNorm = 67.8148, GNorm = 0.6194, lr_0 = 2.8028e-04
Loss = 5.4114e-02, PNorm = 67.8168, GNorm = 0.4337, lr_0 = 2.8009e-04
Loss = 6.2701e-02, PNorm = 67.8228, GNorm = 0.7194, lr_0 = 2.7989e-04
Loss = 5.4287e-02, PNorm = 67.8259, GNorm = 0.5685, lr_0 = 2.7970e-04
Loss = 4.8334e-02, PNorm = 67.8291, GNorm = 0.4846, lr_0 = 2.7951e-04
Loss = 4.5646e-02, PNorm = 67.8336, GNorm = 0.5205, lr_0 = 2.7932e-04
Loss = 5.1596e-02, PNorm = 67.8391, GNorm = 0.8323, lr_0 = 2.7913e-04
Loss = 6.2597e-02, PNorm = 67.8457, GNorm = 0.8157, lr_0 = 2.7894e-04
Loss = 6.1770e-02, PNorm = 67.8502, GNorm = 1.5791, lr_0 = 2.7875e-04
Loss = 6.3336e-02, PNorm = 67.8563, GNorm = 0.7053, lr_0 = 2.7855e-04
Loss = 5.9835e-02, PNorm = 67.8615, GNorm = 0.9969, lr_0 = 2.7836e-04
Loss = 5.4392e-02, PNorm = 67.8648, GNorm = 0.5513, lr_0 = 2.7817e-04
Loss = 5.6299e-02, PNorm = 67.8683, GNorm = 0.6236, lr_0 = 2.7798e-04
Loss = 6.0242e-02, PNorm = 67.8723, GNorm = 0.6370, lr_0 = 2.7779e-04
Loss = 5.9968e-02, PNorm = 67.8770, GNorm = 0.5195, lr_0 = 2.7760e-04
Loss = 5.6417e-02, PNorm = 67.8838, GNorm = 0.7620, lr_0 = 2.7741e-04
Loss = 5.1714e-02, PNorm = 67.8894, GNorm = 0.5122, lr_0 = 2.7722e-04
Loss = 6.0893e-02, PNorm = 67.8929, GNorm = 0.4914, lr_0 = 2.7703e-04
Loss = 5.6399e-02, PNorm = 67.8960, GNorm = 0.6104, lr_0 = 2.7684e-04
Loss = 5.5337e-02, PNorm = 67.9007, GNorm = 0.5006, lr_0 = 2.7665e-04
Loss = 6.8024e-02, PNorm = 67.9071, GNorm = 0.7211, lr_0 = 2.7646e-04
Loss = 6.4478e-02, PNorm = 67.9118, GNorm = 0.7416, lr_0 = 2.7627e-04
Loss = 5.8436e-02, PNorm = 67.9171, GNorm = 0.5814, lr_0 = 2.7608e-04
Loss = 5.4771e-02, PNorm = 67.9246, GNorm = 0.9071, lr_0 = 2.7590e-04
Loss = 6.4349e-02, PNorm = 67.9332, GNorm = 0.8237, lr_0 = 2.7571e-04
Loss = 4.9915e-02, PNorm = 67.9395, GNorm = 0.4348, lr_0 = 2.7552e-04
Loss = 6.3532e-02, PNorm = 67.9430, GNorm = 0.5539, lr_0 = 2.7533e-04
Loss = 6.2149e-02, PNorm = 67.9498, GNorm = 0.4408, lr_0 = 2.7514e-04
Loss = 5.1276e-02, PNorm = 67.9551, GNorm = 0.9484, lr_0 = 2.7495e-04
Loss = 6.0774e-02, PNorm = 67.9601, GNorm = 0.4267, lr_0 = 2.7476e-04
Loss = 5.5595e-02, PNorm = 67.9639, GNorm = 0.4704, lr_0 = 2.7457e-04
Loss = 5.6316e-02, PNorm = 67.9673, GNorm = 0.4386, lr_0 = 2.7439e-04
Loss = 5.1733e-02, PNorm = 67.9717, GNorm = 0.4639, lr_0 = 2.7420e-04
Loss = 5.0394e-02, PNorm = 67.9772, GNorm = 0.5967, lr_0 = 2.7401e-04
Loss = 5.8987e-02, PNorm = 67.9850, GNorm = 0.4450, lr_0 = 2.7382e-04
Loss = 6.6608e-02, PNorm = 67.9899, GNorm = 0.5862, lr_0 = 2.7364e-04
Loss = 5.8793e-02, PNorm = 67.9911, GNorm = 0.5944, lr_0 = 2.7345e-04
Loss = 5.3136e-02, PNorm = 67.9920, GNorm = 0.5042, lr_0 = 2.7326e-04
Loss = 6.0413e-02, PNorm = 67.9950, GNorm = 0.6201, lr_0 = 2.7307e-04
Loss = 5.7634e-02, PNorm = 67.9990, GNorm = 0.7018, lr_0 = 2.7289e-04
Loss = 5.3750e-02, PNorm = 68.0031, GNorm = 0.4707, lr_0 = 2.7270e-04
Loss = 5.4336e-02, PNorm = 68.0062, GNorm = 0.4791, lr_0 = 2.7251e-04
Loss = 5.2783e-02, PNorm = 68.0126, GNorm = 0.7900, lr_0 = 2.7233e-04
Loss = 6.9212e-02, PNorm = 68.0201, GNorm = 0.7401, lr_0 = 2.7214e-04
Loss = 7.0168e-02, PNorm = 68.0227, GNorm = 0.6417, lr_0 = 2.7195e-04
Loss = 5.7233e-02, PNorm = 68.0265, GNorm = 0.4286, lr_0 = 2.7177e-04
Loss = 5.2243e-02, PNorm = 68.0299, GNorm = 0.5306, lr_0 = 2.7158e-04
Loss = 6.2185e-02, PNorm = 68.0350, GNorm = 0.4591, lr_0 = 2.7139e-04
Loss = 6.1001e-02, PNorm = 68.0390, GNorm = 0.8384, lr_0 = 2.7121e-04
Loss = 5.1013e-02, PNorm = 68.0430, GNorm = 0.9675, lr_0 = 2.7102e-04
Loss = 5.7162e-02, PNorm = 68.0458, GNorm = 0.6555, lr_0 = 2.7084e-04
Loss = 5.3647e-02, PNorm = 68.0494, GNorm = 0.3977, lr_0 = 2.7065e-04
Loss = 6.3661e-02, PNorm = 68.0537, GNorm = 0.6652, lr_0 = 2.7047e-04
Loss = 6.0467e-02, PNorm = 68.0570, GNorm = 0.5134, lr_0 = 2.7028e-04
Loss = 6.1615e-02, PNorm = 68.0599, GNorm = 0.4041, lr_0 = 2.7010e-04
Loss = 6.1535e-02, PNorm = 68.0629, GNorm = 0.5205, lr_0 = 2.6991e-04
Loss = 5.8049e-02, PNorm = 68.0664, GNorm = 0.4828, lr_0 = 2.6973e-04
Loss = 5.6316e-02, PNorm = 68.0719, GNorm = 0.5188, lr_0 = 2.6954e-04
Loss = 6.0362e-02, PNorm = 68.0788, GNorm = 1.0818, lr_0 = 2.6936e-04
Loss = 7.4003e-02, PNorm = 68.0855, GNorm = 0.7757, lr_0 = 2.6917e-04
Loss = 5.1762e-02, PNorm = 68.0905, GNorm = 0.4348, lr_0 = 2.6899e-04
Loss = 5.2684e-02, PNorm = 68.0943, GNorm = 0.5300, lr_0 = 2.6880e-04
Loss = 6.6130e-02, PNorm = 68.0992, GNorm = 0.6918, lr_0 = 2.6862e-04
Loss = 5.6020e-02, PNorm = 68.1066, GNorm = 0.7369, lr_0 = 2.6844e-04
Loss = 5.8153e-02, PNorm = 68.1113, GNorm = 0.4868, lr_0 = 2.6825e-04
Validation mae = 0.387653
Epoch 18
Loss = 5.2874e-02, PNorm = 68.1179, GNorm = 0.7160, lr_0 = 2.6807e-04
Loss = 4.6504e-02, PNorm = 68.1241, GNorm = 0.4812, lr_0 = 2.6788e-04
Loss = 4.9655e-02, PNorm = 68.1290, GNorm = 0.8331, lr_0 = 2.6770e-04
Loss = 4.8063e-02, PNorm = 68.1346, GNorm = 0.5099, lr_0 = 2.6752e-04
Loss = 4.8128e-02, PNorm = 68.1406, GNorm = 0.4101, lr_0 = 2.6733e-04
Loss = 4.7553e-02, PNorm = 68.1451, GNorm = 0.3732, lr_0 = 2.6715e-04
Loss = 5.9111e-02, PNorm = 68.1487, GNorm = 0.6986, lr_0 = 2.6697e-04
Loss = 5.4534e-02, PNorm = 68.1540, GNorm = 0.5521, lr_0 = 2.6678e-04
Loss = 5.7322e-02, PNorm = 68.1608, GNorm = 0.6778, lr_0 = 2.6660e-04
Loss = 4.7735e-02, PNorm = 68.1662, GNorm = 0.5333, lr_0 = 2.6642e-04
Loss = 4.3100e-02, PNorm = 68.1707, GNorm = 0.5321, lr_0 = 2.6624e-04
Loss = 5.4652e-02, PNorm = 68.1766, GNorm = 0.3793, lr_0 = 2.6605e-04
Loss = 5.0035e-02, PNorm = 68.1819, GNorm = 0.4983, lr_0 = 2.6587e-04
Loss = 4.7047e-02, PNorm = 68.1860, GNorm = 0.4466, lr_0 = 2.6569e-04
Loss = 5.0480e-02, PNorm = 68.1880, GNorm = 0.7648, lr_0 = 2.6551e-04
Loss = 5.0828e-02, PNorm = 68.1905, GNorm = 0.5170, lr_0 = 2.6533e-04
Loss = 4.6397e-02, PNorm = 68.1925, GNorm = 0.6892, lr_0 = 2.6514e-04
Loss = 5.4475e-02, PNorm = 68.1946, GNorm = 0.4560, lr_0 = 2.6496e-04
Loss = 5.5130e-02, PNorm = 68.1993, GNorm = 0.5840, lr_0 = 2.6478e-04
Loss = 5.0979e-02, PNorm = 68.2054, GNorm = 0.5290, lr_0 = 2.6460e-04
Loss = 5.1091e-02, PNorm = 68.2098, GNorm = 0.5846, lr_0 = 2.6442e-04
Loss = 5.4656e-02, PNorm = 68.2115, GNorm = 0.8590, lr_0 = 2.6424e-04
Loss = 5.0873e-02, PNorm = 68.2156, GNorm = 0.6562, lr_0 = 2.6406e-04
Loss = 5.8224e-02, PNorm = 68.2213, GNorm = 0.9040, lr_0 = 2.6388e-04
Loss = 5.0087e-02, PNorm = 68.2265, GNorm = 0.6912, lr_0 = 2.6369e-04
Loss = 4.3450e-02, PNorm = 68.2317, GNorm = 0.4763, lr_0 = 2.6351e-04
Loss = 5.3741e-02, PNorm = 68.2364, GNorm = 0.7272, lr_0 = 2.6333e-04
Loss = 4.9282e-02, PNorm = 68.2417, GNorm = 0.5159, lr_0 = 2.6315e-04
Loss = 5.1526e-02, PNorm = 68.2460, GNorm = 0.5228, lr_0 = 2.6297e-04
Loss = 5.3907e-02, PNorm = 68.2487, GNorm = 0.4195, lr_0 = 2.6279e-04
Loss = 5.2249e-02, PNorm = 68.2513, GNorm = 0.5190, lr_0 = 2.6261e-04
Loss = 4.8538e-02, PNorm = 68.2558, GNorm = 0.4945, lr_0 = 2.6243e-04
Loss = 4.9467e-02, PNorm = 68.2614, GNorm = 0.5261, lr_0 = 2.6225e-04
Loss = 5.4080e-02, PNorm = 68.2650, GNorm = 0.8521, lr_0 = 2.6207e-04
Loss = 5.1871e-02, PNorm = 68.2689, GNorm = 0.4233, lr_0 = 2.6189e-04
Loss = 5.0746e-02, PNorm = 68.2745, GNorm = 0.6371, lr_0 = 2.6171e-04
Loss = 4.9770e-02, PNorm = 68.2798, GNorm = 0.3934, lr_0 = 2.6153e-04
Loss = 4.9605e-02, PNorm = 68.2829, GNorm = 0.5898, lr_0 = 2.6136e-04
Loss = 5.5841e-02, PNorm = 68.2860, GNorm = 0.6592, lr_0 = 2.6118e-04
Loss = 5.1103e-02, PNorm = 68.2911, GNorm = 0.4795, lr_0 = 2.6100e-04
Loss = 5.2033e-02, PNorm = 68.2971, GNorm = 0.5335, lr_0 = 2.6082e-04
Loss = 5.3038e-02, PNorm = 68.3039, GNorm = 0.5306, lr_0 = 2.6064e-04
Loss = 5.5575e-02, PNorm = 68.3098, GNorm = 0.4717, lr_0 = 2.6046e-04
Loss = 5.6325e-02, PNorm = 68.3149, GNorm = 0.4799, lr_0 = 2.6028e-04
Loss = 4.5010e-02, PNorm = 68.3209, GNorm = 0.6964, lr_0 = 2.6011e-04
Loss = 7.0373e-02, PNorm = 68.3260, GNorm = 0.5640, lr_0 = 2.5993e-04
Loss = 5.4362e-02, PNorm = 68.3312, GNorm = 0.6410, lr_0 = 2.5975e-04
Loss = 5.0791e-02, PNorm = 68.3346, GNorm = 0.5367, lr_0 = 2.5957e-04
Loss = 4.6837e-02, PNorm = 68.3376, GNorm = 0.6128, lr_0 = 2.5939e-04
Loss = 5.2185e-02, PNorm = 68.3437, GNorm = 0.5236, lr_0 = 2.5922e-04
Loss = 5.4874e-02, PNorm = 68.3507, GNorm = 0.5502, lr_0 = 2.5904e-04
Loss = 5.2577e-02, PNorm = 68.3560, GNorm = 0.6739, lr_0 = 2.5886e-04
Loss = 4.8833e-02, PNorm = 68.3588, GNorm = 0.9075, lr_0 = 2.5868e-04
Loss = 5.4752e-02, PNorm = 68.3632, GNorm = 0.8420, lr_0 = 2.5851e-04
Loss = 4.9797e-02, PNorm = 68.3692, GNorm = 0.4943, lr_0 = 2.5833e-04
Loss = 5.2862e-02, PNorm = 68.3769, GNorm = 0.4705, lr_0 = 2.5815e-04
Loss = 5.0728e-02, PNorm = 68.3803, GNorm = 0.5442, lr_0 = 2.5797e-04
Loss = 5.1946e-02, PNorm = 68.3832, GNorm = 0.4699, lr_0 = 2.5780e-04
Loss = 4.9375e-02, PNorm = 68.3891, GNorm = 0.9530, lr_0 = 2.5762e-04
Loss = 4.7046e-02, PNorm = 68.3942, GNorm = 0.3905, lr_0 = 2.5745e-04
Loss = 4.9151e-02, PNorm = 68.3981, GNorm = 0.4600, lr_0 = 2.5727e-04
Loss = 5.8615e-02, PNorm = 68.4027, GNorm = 0.4510, lr_0 = 2.5709e-04
Loss = 5.8884e-02, PNorm = 68.4067, GNorm = 0.7876, lr_0 = 2.5692e-04
Loss = 5.6843e-02, PNorm = 68.4094, GNorm = 0.5697, lr_0 = 2.5674e-04
Loss = 5.5328e-02, PNorm = 68.4115, GNorm = 0.3873, lr_0 = 2.5656e-04
Loss = 6.1035e-02, PNorm = 68.4154, GNorm = 0.4303, lr_0 = 2.5639e-04
Loss = 5.1386e-02, PNorm = 68.4207, GNorm = 0.4469, lr_0 = 2.5621e-04
Loss = 5.1803e-02, PNorm = 68.4241, GNorm = 0.6189, lr_0 = 2.5604e-04
Loss = 6.1889e-02, PNorm = 68.4274, GNorm = 0.6160, lr_0 = 2.5586e-04
Loss = 5.3032e-02, PNorm = 68.4306, GNorm = 0.8006, lr_0 = 2.5569e-04
Loss = 5.1401e-02, PNorm = 68.4345, GNorm = 0.5517, lr_0 = 2.5551e-04
Loss = 5.9024e-02, PNorm = 68.4388, GNorm = 0.5361, lr_0 = 2.5534e-04
Loss = 5.2708e-02, PNorm = 68.4437, GNorm = 0.7079, lr_0 = 2.5516e-04
Loss = 4.7497e-02, PNorm = 68.4492, GNorm = 0.5567, lr_0 = 2.5499e-04
Loss = 5.3878e-02, PNorm = 68.4542, GNorm = 0.4099, lr_0 = 2.5481e-04
Loss = 5.8030e-02, PNorm = 68.4586, GNorm = 0.7376, lr_0 = 2.5464e-04
Loss = 4.7123e-02, PNorm = 68.4641, GNorm = 0.6001, lr_0 = 2.5446e-04
Loss = 4.7668e-02, PNorm = 68.4672, GNorm = 0.6111, lr_0 = 2.5429e-04
Loss = 5.1647e-02, PNorm = 68.4712, GNorm = 0.5734, lr_0 = 2.5411e-04
Loss = 6.1761e-02, PNorm = 68.4748, GNorm = 0.4725, lr_0 = 2.5394e-04
Loss = 6.0073e-02, PNorm = 68.4793, GNorm = 0.5316, lr_0 = 2.5377e-04
Loss = 5.7526e-02, PNorm = 68.4865, GNorm = 0.8572, lr_0 = 2.5359e-04
Loss = 4.9402e-02, PNorm = 68.4918, GNorm = 0.5131, lr_0 = 2.5342e-04
Loss = 5.0922e-02, PNorm = 68.4956, GNorm = 0.5627, lr_0 = 2.5325e-04
Loss = 5.4032e-02, PNorm = 68.5014, GNorm = 0.8207, lr_0 = 2.5307e-04
Loss = 4.6853e-02, PNorm = 68.5064, GNorm = 0.6296, lr_0 = 2.5290e-04
Loss = 5.2688e-02, PNorm = 68.5124, GNorm = 0.7304, lr_0 = 2.5273e-04
Loss = 6.2986e-02, PNorm = 68.5198, GNorm = 0.6759, lr_0 = 2.5255e-04
Loss = 4.8034e-02, PNorm = 68.5291, GNorm = 0.4988, lr_0 = 2.5238e-04
Loss = 5.1211e-02, PNorm = 68.5345, GNorm = 0.5608, lr_0 = 2.5221e-04
Loss = 5.2411e-02, PNorm = 68.5388, GNorm = 0.4469, lr_0 = 2.5203e-04
Loss = 6.2208e-02, PNorm = 68.5435, GNorm = 0.4559, lr_0 = 2.5186e-04
Loss = 5.7522e-02, PNorm = 68.5471, GNorm = 0.8566, lr_0 = 2.5169e-04
Loss = 5.3622e-02, PNorm = 68.5489, GNorm = 0.6040, lr_0 = 2.5152e-04
Loss = 4.8570e-02, PNorm = 68.5529, GNorm = 0.4326, lr_0 = 2.5134e-04
Loss = 4.5654e-02, PNorm = 68.5552, GNorm = 0.4004, lr_0 = 2.5117e-04
Loss = 5.5896e-02, PNorm = 68.5580, GNorm = 0.5599, lr_0 = 2.5100e-04
Loss = 5.5718e-02, PNorm = 68.5636, GNorm = 0.4691, lr_0 = 2.5083e-04
Loss = 6.2740e-02, PNorm = 68.5674, GNorm = 0.7085, lr_0 = 2.5066e-04
Loss = 5.5130e-02, PNorm = 68.5694, GNorm = 0.5836, lr_0 = 2.5048e-04
Loss = 5.5725e-02, PNorm = 68.5744, GNorm = 0.7504, lr_0 = 2.5031e-04
Loss = 5.4870e-02, PNorm = 68.5784, GNorm = 0.6230, lr_0 = 2.5014e-04
Loss = 4.8675e-02, PNorm = 68.5809, GNorm = 0.6678, lr_0 = 2.4997e-04
Loss = 5.3703e-02, PNorm = 68.5830, GNorm = 0.5133, lr_0 = 2.4980e-04
Loss = 5.2510e-02, PNorm = 68.5871, GNorm = 0.4551, lr_0 = 2.4963e-04
Loss = 6.0472e-02, PNorm = 68.5933, GNorm = 0.4973, lr_0 = 2.4946e-04
Loss = 4.7433e-02, PNorm = 68.5978, GNorm = 0.3854, lr_0 = 2.4929e-04
Loss = 5.7461e-02, PNorm = 68.6011, GNorm = 0.5181, lr_0 = 2.4911e-04
Loss = 5.6449e-02, PNorm = 68.6031, GNorm = 0.5643, lr_0 = 2.4894e-04
Loss = 5.4881e-02, PNorm = 68.6057, GNorm = 0.5694, lr_0 = 2.4877e-04
Loss = 5.5791e-02, PNorm = 68.6097, GNorm = 0.3779, lr_0 = 2.4860e-04
Loss = 6.0399e-02, PNorm = 68.6142, GNorm = 0.4801, lr_0 = 2.4843e-04
Loss = 5.1724e-02, PNorm = 68.6191, GNorm = 0.5735, lr_0 = 2.4826e-04
Loss = 4.9594e-02, PNorm = 68.6248, GNorm = 0.8841, lr_0 = 2.4809e-04
Loss = 4.6956e-02, PNorm = 68.6299, GNorm = 0.4625, lr_0 = 2.4792e-04
Loss = 5.3235e-02, PNorm = 68.6357, GNorm = 0.4450, lr_0 = 2.4775e-04
Loss = 6.0833e-02, PNorm = 68.6389, GNorm = 0.9017, lr_0 = 2.4758e-04
Loss = 5.1832e-02, PNorm = 68.6444, GNorm = 0.4894, lr_0 = 2.4741e-04
Loss = 4.8495e-02, PNorm = 68.6476, GNorm = 0.4990, lr_0 = 2.4724e-04
Loss = 5.3905e-02, PNorm = 68.6497, GNorm = 0.8777, lr_0 = 2.4707e-04
Validation mae = 0.386677
Epoch 19
Loss = 4.1580e-02, PNorm = 68.6543, GNorm = 0.4036, lr_0 = 2.4690e-04
Loss = 4.5222e-02, PNorm = 68.6584, GNorm = 0.5732, lr_0 = 2.4674e-04
Loss = 4.5518e-02, PNorm = 68.6619, GNorm = 0.3883, lr_0 = 2.4657e-04
Loss = 5.3273e-02, PNorm = 68.6686, GNorm = 0.5234, lr_0 = 2.4640e-04
Loss = 4.6766e-02, PNorm = 68.6739, GNorm = 0.4936, lr_0 = 2.4623e-04
Loss = 5.0881e-02, PNorm = 68.6772, GNorm = 0.5132, lr_0 = 2.4606e-04
Loss = 4.3372e-02, PNorm = 68.6814, GNorm = 0.5133, lr_0 = 2.4589e-04
Loss = 5.1857e-02, PNorm = 68.6850, GNorm = 0.6373, lr_0 = 2.4572e-04
Loss = 4.1617e-02, PNorm = 68.6880, GNorm = 0.4801, lr_0 = 2.4556e-04
Loss = 5.4332e-02, PNorm = 68.6931, GNorm = 0.4762, lr_0 = 2.4539e-04
Loss = 5.4410e-02, PNorm = 68.7008, GNorm = 0.4342, lr_0 = 2.4522e-04
Loss = 4.8988e-02, PNorm = 68.7059, GNorm = 0.5457, lr_0 = 2.4505e-04
Loss = 4.7452e-02, PNorm = 68.7101, GNorm = 0.5073, lr_0 = 2.4488e-04
Loss = 4.6565e-02, PNorm = 68.7152, GNorm = 0.5070, lr_0 = 2.4472e-04
Loss = 4.0446e-02, PNorm = 68.7210, GNorm = 0.4584, lr_0 = 2.4455e-04
Loss = 5.9209e-02, PNorm = 68.7253, GNorm = 0.3589, lr_0 = 2.4438e-04
Loss = 5.0266e-02, PNorm = 68.7306, GNorm = 0.4832, lr_0 = 2.4421e-04
Loss = 5.2967e-02, PNorm = 68.7366, GNorm = 0.4765, lr_0 = 2.4405e-04
Loss = 4.4442e-02, PNorm = 68.7421, GNorm = 0.5338, lr_0 = 2.4388e-04
Loss = 4.5672e-02, PNorm = 68.7461, GNorm = 0.5838, lr_0 = 2.4371e-04
Loss = 4.7102e-02, PNorm = 68.7494, GNorm = 0.4316, lr_0 = 2.4354e-04
Loss = 4.7868e-02, PNorm = 68.7537, GNorm = 0.3569, lr_0 = 2.4338e-04
Loss = 4.1146e-02, PNorm = 68.7589, GNorm = 0.5070, lr_0 = 2.4321e-04
Loss = 4.4356e-02, PNorm = 68.7615, GNorm = 0.4095, lr_0 = 2.4304e-04
Loss = 4.9625e-02, PNorm = 68.7653, GNorm = 0.6040, lr_0 = 2.4288e-04
Loss = 4.7222e-02, PNorm = 68.7701, GNorm = 0.6418, lr_0 = 2.4271e-04
Loss = 5.4849e-02, PNorm = 68.7745, GNorm = 0.6221, lr_0 = 2.4254e-04
Loss = 4.7742e-02, PNorm = 68.7786, GNorm = 0.6408, lr_0 = 2.4238e-04
Loss = 5.0002e-02, PNorm = 68.7833, GNorm = 0.4920, lr_0 = 2.4221e-04
Loss = 4.3887e-02, PNorm = 68.7883, GNorm = 0.5059, lr_0 = 2.4205e-04
Loss = 5.2876e-02, PNorm = 68.7926, GNorm = 0.5779, lr_0 = 2.4188e-04
Loss = 4.1186e-02, PNorm = 68.7968, GNorm = 0.5678, lr_0 = 2.4171e-04
Loss = 4.5750e-02, PNorm = 68.8006, GNorm = 0.5157, lr_0 = 2.4155e-04
Loss = 5.4177e-02, PNorm = 68.8047, GNorm = 0.4342, lr_0 = 2.4138e-04
Loss = 4.6016e-02, PNorm = 68.8087, GNorm = 0.4958, lr_0 = 2.4122e-04
Loss = 4.6847e-02, PNorm = 68.8141, GNorm = 0.5122, lr_0 = 2.4105e-04
Loss = 4.8600e-02, PNorm = 68.8187, GNorm = 0.5160, lr_0 = 2.4089e-04
Loss = 5.3282e-02, PNorm = 68.8226, GNorm = 0.5366, lr_0 = 2.4072e-04
Loss = 4.1184e-02, PNorm = 68.8263, GNorm = 0.4726, lr_0 = 2.4056e-04
Loss = 4.6809e-02, PNorm = 68.8318, GNorm = 0.4715, lr_0 = 2.4039e-04
Loss = 5.8278e-02, PNorm = 68.8380, GNorm = 0.8277, lr_0 = 2.4023e-04
Loss = 6.0678e-02, PNorm = 68.8441, GNorm = 0.6432, lr_0 = 2.4006e-04
Loss = 4.7019e-02, PNorm = 68.8509, GNorm = 0.4935, lr_0 = 2.3990e-04
Loss = 4.9078e-02, PNorm = 68.8555, GNorm = 0.5791, lr_0 = 2.3974e-04
Loss = 5.0522e-02, PNorm = 68.8591, GNorm = 0.4307, lr_0 = 2.3957e-04
Loss = 4.3952e-02, PNorm = 68.8641, GNorm = 0.4583, lr_0 = 2.3941e-04
Loss = 4.5960e-02, PNorm = 68.8687, GNorm = 0.4407, lr_0 = 2.3924e-04
Loss = 4.4312e-02, PNorm = 68.8726, GNorm = 0.6725, lr_0 = 2.3908e-04
Loss = 4.9167e-02, PNorm = 68.8766, GNorm = 0.5701, lr_0 = 2.3892e-04
Loss = 5.7549e-02, PNorm = 68.8797, GNorm = 0.3686, lr_0 = 2.3875e-04
Loss = 5.5407e-02, PNorm = 68.8825, GNorm = 0.6070, lr_0 = 2.3859e-04
Loss = 5.8557e-02, PNorm = 68.8868, GNorm = 0.7584, lr_0 = 2.3842e-04
Loss = 4.7156e-02, PNorm = 68.8907, GNorm = 0.4191, lr_0 = 2.3826e-04
Loss = 4.3132e-02, PNorm = 68.8929, GNorm = 0.5703, lr_0 = 2.3810e-04
Loss = 4.5922e-02, PNorm = 68.8958, GNorm = 0.4462, lr_0 = 2.3794e-04
Loss = 5.0830e-02, PNorm = 68.8991, GNorm = 0.3803, lr_0 = 2.3777e-04
Loss = 5.2081e-02, PNorm = 68.9024, GNorm = 0.6299, lr_0 = 2.3761e-04
Loss = 4.6744e-02, PNorm = 68.9064, GNorm = 0.6259, lr_0 = 2.3745e-04
Loss = 5.0249e-02, PNorm = 68.9091, GNorm = 0.4747, lr_0 = 2.3728e-04
Loss = 4.9888e-02, PNorm = 68.9135, GNorm = 0.3748, lr_0 = 2.3712e-04
Loss = 5.6411e-02, PNorm = 68.9175, GNorm = 0.6337, lr_0 = 2.3696e-04
Loss = 5.1630e-02, PNorm = 68.9211, GNorm = 0.4812, lr_0 = 2.3680e-04
Loss = 5.0684e-02, PNorm = 68.9262, GNorm = 0.6595, lr_0 = 2.3663e-04
Loss = 4.1306e-02, PNorm = 68.9304, GNorm = 0.4368, lr_0 = 2.3647e-04
Loss = 4.6066e-02, PNorm = 68.9353, GNorm = 0.6293, lr_0 = 2.3631e-04
Loss = 5.1172e-02, PNorm = 68.9410, GNorm = 0.6197, lr_0 = 2.3615e-04
Loss = 5.1653e-02, PNorm = 68.9449, GNorm = 0.6656, lr_0 = 2.3599e-04
Loss = 4.9908e-02, PNorm = 68.9491, GNorm = 0.5739, lr_0 = 2.3582e-04
Loss = 4.9811e-02, PNorm = 68.9556, GNorm = 0.5450, lr_0 = 2.3566e-04
Loss = 4.3157e-02, PNorm = 68.9595, GNorm = 0.6529, lr_0 = 2.3550e-04
Loss = 5.1695e-02, PNorm = 68.9624, GNorm = 0.5499, lr_0 = 2.3534e-04
Loss = 5.3516e-02, PNorm = 68.9680, GNorm = 0.6319, lr_0 = 2.3518e-04
Loss = 5.6675e-02, PNorm = 68.9725, GNorm = 0.5734, lr_0 = 2.3502e-04
Loss = 5.0926e-02, PNorm = 68.9765, GNorm = 0.4507, lr_0 = 2.3486e-04
Loss = 4.3082e-02, PNorm = 68.9783, GNorm = 0.4985, lr_0 = 2.3470e-04
Loss = 5.1282e-02, PNorm = 68.9785, GNorm = 0.5211, lr_0 = 2.3454e-04
Loss = 5.3963e-02, PNorm = 68.9810, GNorm = 0.6497, lr_0 = 2.3437e-04
Loss = 4.8362e-02, PNorm = 68.9846, GNorm = 0.4744, lr_0 = 2.3421e-04
Loss = 6.0463e-02, PNorm = 68.9881, GNorm = 0.7334, lr_0 = 2.3405e-04
Loss = 5.0668e-02, PNorm = 68.9915, GNorm = 0.4041, lr_0 = 2.3389e-04
Loss = 5.4086e-02, PNorm = 68.9948, GNorm = 0.6627, lr_0 = 2.3373e-04
Loss = 5.0891e-02, PNorm = 68.9985, GNorm = 0.6770, lr_0 = 2.3357e-04
Loss = 4.2370e-02, PNorm = 69.0010, GNorm = 0.6550, lr_0 = 2.3341e-04
Loss = 5.8407e-02, PNorm = 69.0042, GNorm = 0.5987, lr_0 = 2.3325e-04
Loss = 4.8359e-02, PNorm = 69.0093, GNorm = 0.7416, lr_0 = 2.3309e-04
Loss = 5.3963e-02, PNorm = 69.0138, GNorm = 0.7664, lr_0 = 2.3293e-04
Loss = 4.6632e-02, PNorm = 69.0171, GNorm = 0.5947, lr_0 = 2.3277e-04
Loss = 4.5876e-02, PNorm = 69.0216, GNorm = 0.5226, lr_0 = 2.3261e-04
Loss = 4.6794e-02, PNorm = 69.0254, GNorm = 0.4270, lr_0 = 2.3246e-04
Loss = 4.7323e-02, PNorm = 69.0295, GNorm = 0.5813, lr_0 = 2.3230e-04
Loss = 5.0545e-02, PNorm = 69.0331, GNorm = 0.4419, lr_0 = 2.3214e-04
Loss = 5.0331e-02, PNorm = 69.0371, GNorm = 0.5327, lr_0 = 2.3198e-04
Loss = 5.6827e-02, PNorm = 69.0400, GNorm = 0.6810, lr_0 = 2.3182e-04
Loss = 5.4643e-02, PNorm = 69.0420, GNorm = 0.7406, lr_0 = 2.3166e-04
Loss = 6.4488e-02, PNorm = 69.0449, GNorm = 0.5589, lr_0 = 2.3150e-04
Loss = 5.1940e-02, PNorm = 69.0482, GNorm = 0.8464, lr_0 = 2.3134e-04
Loss = 4.5702e-02, PNorm = 69.0521, GNorm = 0.6758, lr_0 = 2.3118e-04
Loss = 4.6524e-02, PNorm = 69.0549, GNorm = 0.4529, lr_0 = 2.3103e-04
Loss = 4.2334e-02, PNorm = 69.0573, GNorm = 0.4207, lr_0 = 2.3087e-04
Loss = 4.9082e-02, PNorm = 69.0602, GNorm = 0.5706, lr_0 = 2.3071e-04
Loss = 4.9512e-02, PNorm = 69.0638, GNorm = 0.6065, lr_0 = 2.3055e-04
Loss = 5.1788e-02, PNorm = 69.0681, GNorm = 0.5099, lr_0 = 2.3039e-04
Loss = 4.6779e-02, PNorm = 69.0726, GNorm = 0.3801, lr_0 = 2.3024e-04
Loss = 4.9959e-02, PNorm = 69.0773, GNorm = 0.4776, lr_0 = 2.3008e-04
Loss = 4.6761e-02, PNorm = 69.0825, GNorm = 0.4822, lr_0 = 2.2992e-04
Loss = 4.7145e-02, PNorm = 69.0860, GNorm = 0.5327, lr_0 = 2.2976e-04
Loss = 4.8267e-02, PNorm = 69.0888, GNorm = 0.5583, lr_0 = 2.2961e-04
Loss = 6.4168e-02, PNorm = 69.0925, GNorm = 0.5244, lr_0 = 2.2945e-04
Loss = 4.7981e-02, PNorm = 69.0957, GNorm = 0.8222, lr_0 = 2.2929e-04
Loss = 5.2158e-02, PNorm = 69.0999, GNorm = 0.5635, lr_0 = 2.2913e-04
Loss = 4.8075e-02, PNorm = 69.1041, GNorm = 0.4623, lr_0 = 2.2898e-04
Loss = 4.7802e-02, PNorm = 69.1083, GNorm = 0.5345, lr_0 = 2.2882e-04
Loss = 5.1683e-02, PNorm = 69.1118, GNorm = 0.4164, lr_0 = 2.2866e-04
Loss = 4.6744e-02, PNorm = 69.1158, GNorm = 0.6489, lr_0 = 2.2851e-04
Loss = 4.7746e-02, PNorm = 69.1186, GNorm = 0.7106, lr_0 = 2.2835e-04
Loss = 4.9332e-02, PNorm = 69.1204, GNorm = 0.4587, lr_0 = 2.2819e-04
Loss = 4.6601e-02, PNorm = 69.1239, GNorm = 0.5358, lr_0 = 2.2804e-04
Loss = 6.0652e-02, PNorm = 69.1289, GNorm = 0.6239, lr_0 = 2.2788e-04
Loss = 5.1669e-02, PNorm = 69.1326, GNorm = 0.4811, lr_0 = 2.2773e-04
Loss = 5.6281e-02, PNorm = 69.1362, GNorm = 0.4750, lr_0 = 2.2757e-04
Validation mae = 0.389494
Epoch 20
Loss = 3.9295e-02, PNorm = 69.1390, GNorm = 0.5037, lr_0 = 2.2741e-04
Loss = 3.9305e-02, PNorm = 69.1430, GNorm = 0.4637, lr_0 = 2.2726e-04
Loss = 4.8816e-02, PNorm = 69.1484, GNorm = 0.5527, lr_0 = 2.2710e-04
Loss = 4.2310e-02, PNorm = 69.1536, GNorm = 0.4437, lr_0 = 2.2695e-04
Loss = 4.1103e-02, PNorm = 69.1576, GNorm = 0.5942, lr_0 = 2.2679e-04
Loss = 4.3287e-02, PNorm = 69.1616, GNorm = 0.5334, lr_0 = 2.2664e-04
Loss = 3.9243e-02, PNorm = 69.1655, GNorm = 0.7982, lr_0 = 2.2648e-04
Loss = 3.8831e-02, PNorm = 69.1691, GNorm = 0.6599, lr_0 = 2.2632e-04
Loss = 4.3401e-02, PNorm = 69.1719, GNorm = 0.4029, lr_0 = 2.2617e-04
Loss = 4.4218e-02, PNorm = 69.1755, GNorm = 0.4693, lr_0 = 2.2601e-04
Loss = 5.0480e-02, PNorm = 69.1808, GNorm = 0.3734, lr_0 = 2.2586e-04
Loss = 4.9005e-02, PNorm = 69.1853, GNorm = 0.8143, lr_0 = 2.2571e-04
Loss = 4.9425e-02, PNorm = 69.1902, GNorm = 0.4823, lr_0 = 2.2555e-04
Loss = 4.1111e-02, PNorm = 69.1956, GNorm = 0.6919, lr_0 = 2.2540e-04
Loss = 4.1283e-02, PNorm = 69.2010, GNorm = 0.4370, lr_0 = 2.2524e-04
Loss = 3.8230e-02, PNorm = 69.2040, GNorm = 0.3837, lr_0 = 2.2509e-04
Loss = 4.5805e-02, PNorm = 69.2062, GNorm = 0.5114, lr_0 = 2.2493e-04
Loss = 3.9447e-02, PNorm = 69.2088, GNorm = 0.3910, lr_0 = 2.2478e-04
Loss = 4.4341e-02, PNorm = 69.2119, GNorm = 0.7778, lr_0 = 2.2463e-04
Loss = 3.8936e-02, PNorm = 69.2144, GNorm = 0.4212, lr_0 = 2.2447e-04
Loss = 3.9551e-02, PNorm = 69.2172, GNorm = 0.5944, lr_0 = 2.2432e-04
Loss = 4.0557e-02, PNorm = 69.2210, GNorm = 0.3974, lr_0 = 2.2416e-04
Loss = 4.4856e-02, PNorm = 69.2247, GNorm = 0.5396, lr_0 = 2.2401e-04
Loss = 5.1310e-02, PNorm = 69.2282, GNorm = 0.7277, lr_0 = 2.2386e-04
Loss = 3.9488e-02, PNorm = 69.2328, GNorm = 0.4157, lr_0 = 2.2370e-04
Loss = 4.4812e-02, PNorm = 69.2362, GNorm = 0.5965, lr_0 = 2.2355e-04
Loss = 4.3557e-02, PNorm = 69.2408, GNorm = 0.5089, lr_0 = 2.2340e-04
Loss = 4.2463e-02, PNorm = 69.2443, GNorm = 0.7167, lr_0 = 2.2324e-04
Loss = 4.0026e-02, PNorm = 69.2473, GNorm = 0.4618, lr_0 = 2.2309e-04
Loss = 4.9369e-02, PNorm = 69.2521, GNorm = 0.5727, lr_0 = 2.2294e-04
Loss = 4.6141e-02, PNorm = 69.2575, GNorm = 0.5017, lr_0 = 2.2279e-04
Loss = 4.3474e-02, PNorm = 69.2616, GNorm = 0.6375, lr_0 = 2.2263e-04
Loss = 4.5438e-02, PNorm = 69.2660, GNorm = 0.3684, lr_0 = 2.2248e-04
Loss = 3.8692e-02, PNorm = 69.2683, GNorm = 0.5452, lr_0 = 2.2233e-04
Loss = 4.3308e-02, PNorm = 69.2714, GNorm = 0.6372, lr_0 = 2.2218e-04
Loss = 4.5832e-02, PNorm = 69.2762, GNorm = 0.6958, lr_0 = 2.2202e-04
Loss = 4.3347e-02, PNorm = 69.2801, GNorm = 0.4850, lr_0 = 2.2187e-04
Loss = 4.8064e-02, PNorm = 69.2835, GNorm = 0.5089, lr_0 = 2.2172e-04
Loss = 4.3324e-02, PNorm = 69.2878, GNorm = 0.4971, lr_0 = 2.2157e-04
Loss = 4.7113e-02, PNorm = 69.2924, GNorm = 0.8693, lr_0 = 2.2142e-04
Loss = 4.3992e-02, PNorm = 69.2964, GNorm = 0.4974, lr_0 = 2.2126e-04
Loss = 4.9951e-02, PNorm = 69.3000, GNorm = 0.5865, lr_0 = 2.2111e-04
Loss = 4.6094e-02, PNorm = 69.3036, GNorm = 0.5817, lr_0 = 2.2096e-04
Loss = 4.1278e-02, PNorm = 69.3070, GNorm = 0.6524, lr_0 = 2.2081e-04
Loss = 4.4391e-02, PNorm = 69.3088, GNorm = 0.5540, lr_0 = 2.2066e-04
Loss = 4.4598e-02, PNorm = 69.3116, GNorm = 0.6117, lr_0 = 2.2051e-04
Loss = 4.5487e-02, PNorm = 69.3158, GNorm = 0.5288, lr_0 = 2.2036e-04
Loss = 4.8691e-02, PNorm = 69.3209, GNorm = 0.5640, lr_0 = 2.2021e-04
Loss = 5.3705e-02, PNorm = 69.3237, GNorm = 0.6726, lr_0 = 2.2005e-04
Loss = 4.4650e-02, PNorm = 69.3266, GNorm = 0.6659, lr_0 = 2.1990e-04
Loss = 5.1613e-02, PNorm = 69.3308, GNorm = 0.8313, lr_0 = 2.1975e-04
Loss = 4.9239e-02, PNorm = 69.3355, GNorm = 0.6381, lr_0 = 2.1960e-04
Loss = 4.4932e-02, PNorm = 69.3406, GNorm = 0.4727, lr_0 = 2.1945e-04
Loss = 4.6259e-02, PNorm = 69.3454, GNorm = 0.6052, lr_0 = 2.1930e-04
Loss = 4.6135e-02, PNorm = 69.3489, GNorm = 0.6585, lr_0 = 2.1915e-04
Loss = 4.7951e-02, PNorm = 69.3522, GNorm = 0.5499, lr_0 = 2.1900e-04
Loss = 4.7806e-02, PNorm = 69.3541, GNorm = 0.4092, lr_0 = 2.1885e-04
Loss = 4.3284e-02, PNorm = 69.3568, GNorm = 0.6124, lr_0 = 2.1870e-04
Loss = 4.3603e-02, PNorm = 69.3604, GNorm = 0.7535, lr_0 = 2.1855e-04
Loss = 5.0531e-02, PNorm = 69.3639, GNorm = 0.6384, lr_0 = 2.1840e-04
Loss = 4.2811e-02, PNorm = 69.3684, GNorm = 0.6413, lr_0 = 2.1825e-04
Loss = 4.6514e-02, PNorm = 69.3715, GNorm = 0.3909, lr_0 = 2.1810e-04
Loss = 4.9784e-02, PNorm = 69.3749, GNorm = 0.5069, lr_0 = 2.1795e-04
Loss = 5.0038e-02, PNorm = 69.3770, GNorm = 0.3979, lr_0 = 2.1780e-04
Loss = 5.5996e-02, PNorm = 69.3809, GNorm = 0.5961, lr_0 = 2.1765e-04
Loss = 4.5802e-02, PNorm = 69.3863, GNorm = 0.4988, lr_0 = 2.1751e-04
Loss = 4.9532e-02, PNorm = 69.3886, GNorm = 0.4759, lr_0 = 2.1736e-04
Loss = 5.7269e-02, PNorm = 69.3926, GNorm = 0.4755, lr_0 = 2.1721e-04
Loss = 4.5426e-02, PNorm = 69.3964, GNorm = 0.4876, lr_0 = 2.1706e-04
Loss = 4.8665e-02, PNorm = 69.3993, GNorm = 0.5515, lr_0 = 2.1691e-04
Loss = 5.5113e-02, PNorm = 69.4030, GNorm = 0.6508, lr_0 = 2.1676e-04
Loss = 4.4705e-02, PNorm = 69.4068, GNorm = 0.4461, lr_0 = 2.1661e-04
Loss = 4.5774e-02, PNorm = 69.4095, GNorm = 0.5160, lr_0 = 2.1646e-04
Loss = 5.0187e-02, PNorm = 69.4127, GNorm = 0.4363, lr_0 = 2.1632e-04
Loss = 4.2077e-02, PNorm = 69.4166, GNorm = 0.5288, lr_0 = 2.1617e-04
Loss = 4.7129e-02, PNorm = 69.4205, GNorm = 0.4251, lr_0 = 2.1602e-04
Loss = 5.4892e-02, PNorm = 69.4225, GNorm = 0.6089, lr_0 = 2.1587e-04
Loss = 5.6087e-02, PNorm = 69.4252, GNorm = 0.5224, lr_0 = 2.1572e-04
Loss = 4.8327e-02, PNorm = 69.4307, GNorm = 0.7780, lr_0 = 2.1558e-04
Loss = 4.4961e-02, PNorm = 69.4352, GNorm = 0.6825, lr_0 = 2.1543e-04
Loss = 4.3156e-02, PNorm = 69.4384, GNorm = 0.5303, lr_0 = 2.1528e-04
Loss = 5.1548e-02, PNorm = 69.4402, GNorm = 0.5901, lr_0 = 2.1513e-04
Loss = 4.6682e-02, PNorm = 69.4436, GNorm = 0.5648, lr_0 = 2.1499e-04
Loss = 4.0359e-02, PNorm = 69.4482, GNorm = 0.3544, lr_0 = 2.1484e-04
Loss = 4.7947e-02, PNorm = 69.4518, GNorm = 0.5095, lr_0 = 2.1469e-04
Loss = 5.2268e-02, PNorm = 69.4543, GNorm = 0.3837, lr_0 = 2.1454e-04
Loss = 5.4306e-02, PNorm = 69.4571, GNorm = 0.5352, lr_0 = 2.1440e-04
Loss = 6.3293e-02, PNorm = 69.4599, GNorm = 0.6526, lr_0 = 2.1425e-04
Loss = 5.3114e-02, PNorm = 69.4633, GNorm = 0.5125, lr_0 = 2.1410e-04
Loss = 4.6862e-02, PNorm = 69.4681, GNorm = 0.4641, lr_0 = 2.1396e-04
Loss = 4.6214e-02, PNorm = 69.4735, GNorm = 0.6549, lr_0 = 2.1381e-04
Loss = 3.8002e-02, PNorm = 69.4776, GNorm = 0.4346, lr_0 = 2.1366e-04
Loss = 5.1316e-02, PNorm = 69.4817, GNorm = 0.7072, lr_0 = 2.1352e-04
Loss = 5.7062e-02, PNorm = 69.4864, GNorm = 0.6496, lr_0 = 2.1337e-04
Loss = 4.6846e-02, PNorm = 69.4891, GNorm = 0.4744, lr_0 = 2.1323e-04
Loss = 5.7591e-02, PNorm = 69.4939, GNorm = 0.7518, lr_0 = 2.1308e-04
Loss = 4.1999e-02, PNorm = 69.4977, GNorm = 0.5037, lr_0 = 2.1293e-04
Loss = 4.7022e-02, PNorm = 69.5017, GNorm = 0.6400, lr_0 = 2.1279e-04
Loss = 4.9081e-02, PNorm = 69.5053, GNorm = 0.6266, lr_0 = 2.1264e-04
Loss = 6.1218e-02, PNorm = 69.5069, GNorm = 0.6214, lr_0 = 2.1250e-04
Loss = 5.0252e-02, PNorm = 69.5117, GNorm = 0.5802, lr_0 = 2.1235e-04
Loss = 4.6711e-02, PNorm = 69.5165, GNorm = 0.5530, lr_0 = 2.1221e-04
Loss = 4.7333e-02, PNorm = 69.5202, GNorm = 0.5605, lr_0 = 2.1206e-04
Loss = 4.7244e-02, PNorm = 69.5242, GNorm = 0.5356, lr_0 = 2.1191e-04
Loss = 4.7803e-02, PNorm = 69.5267, GNorm = 0.4139, lr_0 = 2.1177e-04
Loss = 4.9929e-02, PNorm = 69.5289, GNorm = 0.5819, lr_0 = 2.1162e-04
Loss = 4.5161e-02, PNorm = 69.5326, GNorm = 0.5776, lr_0 = 2.1148e-04
Loss = 4.5529e-02, PNorm = 69.5382, GNorm = 0.5806, lr_0 = 2.1133e-04
Loss = 5.6115e-02, PNorm = 69.5408, GNorm = 0.6101, lr_0 = 2.1119e-04
Loss = 4.5834e-02, PNorm = 69.5423, GNorm = 0.6955, lr_0 = 2.1104e-04
Loss = 4.4153e-02, PNorm = 69.5464, GNorm = 0.4301, lr_0 = 2.1090e-04
Loss = 4.7403e-02, PNorm = 69.5512, GNorm = 0.4170, lr_0 = 2.1076e-04
Loss = 4.7445e-02, PNorm = 69.5535, GNorm = 0.5496, lr_0 = 2.1061e-04
Loss = 4.1053e-02, PNorm = 69.5559, GNorm = 0.3757, lr_0 = 2.1047e-04
Loss = 4.9477e-02, PNorm = 69.5586, GNorm = 0.5241, lr_0 = 2.1032e-04
Loss = 4.8019e-02, PNorm = 69.5615, GNorm = 0.4424, lr_0 = 2.1018e-04
Loss = 5.6602e-02, PNorm = 69.5657, GNorm = 0.5853, lr_0 = 2.1003e-04
Loss = 5.4385e-02, PNorm = 69.5724, GNorm = 0.5853, lr_0 = 2.0989e-04
Loss = 4.6548e-02, PNorm = 69.5765, GNorm = 0.4742, lr_0 = 2.0975e-04
Loss = 4.7034e-02, PNorm = 69.5786, GNorm = 0.5341, lr_0 = 2.0960e-04
Validation mae = 0.390999
Epoch 21
Loss = 4.5090e-02, PNorm = 69.5821, GNorm = 0.4271, lr_0 = 2.0946e-04
Loss = 3.9786e-02, PNorm = 69.5836, GNorm = 0.5009, lr_0 = 2.0932e-04
Loss = 3.9882e-02, PNorm = 69.5870, GNorm = 0.4153, lr_0 = 2.0917e-04
Loss = 4.1821e-02, PNorm = 69.5914, GNorm = 0.5807, lr_0 = 2.0903e-04
Loss = 3.4746e-02, PNorm = 69.5963, GNorm = 0.4923, lr_0 = 2.0889e-04
Loss = 4.1488e-02, PNorm = 69.6000, GNorm = 0.3945, lr_0 = 2.0874e-04
Loss = 4.4254e-02, PNorm = 69.6029, GNorm = 0.3657, lr_0 = 2.0860e-04
Loss = 3.7237e-02, PNorm = 69.6050, GNorm = 0.3985, lr_0 = 2.0846e-04
Loss = 4.2555e-02, PNorm = 69.6079, GNorm = 0.3742, lr_0 = 2.0831e-04
Loss = 4.1771e-02, PNorm = 69.6120, GNorm = 0.5575, lr_0 = 2.0817e-04
Loss = 4.4171e-02, PNorm = 69.6166, GNorm = 0.4224, lr_0 = 2.0803e-04
Loss = 4.7478e-02, PNorm = 69.6191, GNorm = 0.5024, lr_0 = 2.0789e-04
Loss = 3.6692e-02, PNorm = 69.6198, GNorm = 0.5439, lr_0 = 2.0774e-04
Loss = 3.6958e-02, PNorm = 69.6236, GNorm = 0.4294, lr_0 = 2.0760e-04
Loss = 4.1892e-02, PNorm = 69.6290, GNorm = 0.4715, lr_0 = 2.0746e-04
Loss = 4.2688e-02, PNorm = 69.6338, GNorm = 0.4265, lr_0 = 2.0732e-04
Loss = 4.1187e-02, PNorm = 69.6369, GNorm = 0.4252, lr_0 = 2.0718e-04
Loss = 3.8608e-02, PNorm = 69.6394, GNorm = 0.4411, lr_0 = 2.0703e-04
Loss = 4.6472e-02, PNorm = 69.6437, GNorm = 0.4742, lr_0 = 2.0689e-04
Loss = 4.3537e-02, PNorm = 69.6465, GNorm = 0.4030, lr_0 = 2.0675e-04
Loss = 3.4403e-02, PNorm = 69.6497, GNorm = 0.4194, lr_0 = 2.0661e-04
Loss = 4.1995e-02, PNorm = 69.6527, GNorm = 0.4534, lr_0 = 2.0647e-04
Loss = 4.4226e-02, PNorm = 69.6558, GNorm = 0.5744, lr_0 = 2.0633e-04
Loss = 5.0466e-02, PNorm = 69.6586, GNorm = 0.6763, lr_0 = 2.0618e-04
Loss = 4.9091e-02, PNorm = 69.6603, GNorm = 0.6360, lr_0 = 2.0604e-04
Loss = 4.3281e-02, PNorm = 69.6642, GNorm = 0.6747, lr_0 = 2.0590e-04
Loss = 3.9429e-02, PNorm = 69.6671, GNorm = 0.5922, lr_0 = 2.0576e-04
Loss = 4.5317e-02, PNorm = 69.6708, GNorm = 0.7780, lr_0 = 2.0562e-04
Loss = 4.7564e-02, PNorm = 69.6744, GNorm = 0.8793, lr_0 = 2.0548e-04
Loss = 4.3461e-02, PNorm = 69.6790, GNorm = 0.4861, lr_0 = 2.0534e-04
Loss = 4.3715e-02, PNorm = 69.6823, GNorm = 0.7377, lr_0 = 2.0520e-04
Loss = 4.2993e-02, PNorm = 69.6843, GNorm = 0.3252, lr_0 = 2.0506e-04
Loss = 4.1732e-02, PNorm = 69.6867, GNorm = 0.4558, lr_0 = 2.0492e-04
Loss = 3.9062e-02, PNorm = 69.6895, GNorm = 0.4821, lr_0 = 2.0478e-04
Loss = 4.6639e-02, PNorm = 69.6924, GNorm = 0.4566, lr_0 = 2.0464e-04
Loss = 4.0759e-02, PNorm = 69.6937, GNorm = 0.3878, lr_0 = 2.0450e-04
Loss = 4.2437e-02, PNorm = 69.6961, GNorm = 0.3732, lr_0 = 2.0436e-04
Loss = 3.6159e-02, PNorm = 69.6993, GNorm = 0.5182, lr_0 = 2.0422e-04
Loss = 5.3374e-02, PNorm = 69.7032, GNorm = 0.4439, lr_0 = 2.0408e-04
Loss = 4.2735e-02, PNorm = 69.7070, GNorm = 0.4925, lr_0 = 2.0394e-04
Loss = 4.1209e-02, PNorm = 69.7109, GNorm = 0.5153, lr_0 = 2.0380e-04
Loss = 4.7859e-02, PNorm = 69.7156, GNorm = 0.6286, lr_0 = 2.0366e-04
Loss = 4.6054e-02, PNorm = 69.7191, GNorm = 0.6157, lr_0 = 2.0352e-04
Loss = 4.3213e-02, PNorm = 69.7221, GNorm = 0.5799, lr_0 = 2.0338e-04
Loss = 4.9474e-02, PNorm = 69.7243, GNorm = 0.7950, lr_0 = 2.0324e-04
Loss = 4.9542e-02, PNorm = 69.7288, GNorm = 0.4377, lr_0 = 2.0310e-04
Loss = 4.0686e-02, PNorm = 69.7323, GNorm = 0.5560, lr_0 = 2.0296e-04
Loss = 4.6772e-02, PNorm = 69.7368, GNorm = 0.4585, lr_0 = 2.0282e-04
Loss = 4.2968e-02, PNorm = 69.7414, GNorm = 0.4390, lr_0 = 2.0268e-04
Loss = 3.7657e-02, PNorm = 69.7458, GNorm = 0.6235, lr_0 = 2.0254e-04
Loss = 5.0665e-02, PNorm = 69.7499, GNorm = 0.5906, lr_0 = 2.0240e-04
Loss = 4.4795e-02, PNorm = 69.7533, GNorm = 0.7183, lr_0 = 2.0227e-04
Loss = 4.3220e-02, PNorm = 69.7557, GNorm = 0.4663, lr_0 = 2.0213e-04
Loss = 4.3520e-02, PNorm = 69.7579, GNorm = 0.7380, lr_0 = 2.0199e-04
Loss = 4.2772e-02, PNorm = 69.7631, GNorm = 0.5935, lr_0 = 2.0185e-04
Loss = 4.6306e-02, PNorm = 69.7675, GNorm = 0.4382, lr_0 = 2.0171e-04
Loss = 4.8990e-02, PNorm = 69.7707, GNorm = 0.5089, lr_0 = 2.0157e-04
Loss = 4.8041e-02, PNorm = 69.7751, GNorm = 0.5249, lr_0 = 2.0144e-04
Loss = 4.4538e-02, PNorm = 69.7783, GNorm = 0.7018, lr_0 = 2.0130e-04
Loss = 4.6420e-02, PNorm = 69.7799, GNorm = 0.5900, lr_0 = 2.0116e-04
Loss = 3.6321e-02, PNorm = 69.7810, GNorm = 0.3984, lr_0 = 2.0102e-04
Loss = 4.6965e-02, PNorm = 69.7842, GNorm = 0.5183, lr_0 = 2.0088e-04
Loss = 3.7439e-02, PNorm = 69.7872, GNorm = 0.4344, lr_0 = 2.0075e-04
Loss = 3.9254e-02, PNorm = 69.7896, GNorm = 0.3720, lr_0 = 2.0061e-04
Loss = 3.7488e-02, PNorm = 69.7920, GNorm = 0.4435, lr_0 = 2.0047e-04
Loss = 3.7449e-02, PNorm = 69.7951, GNorm = 0.7523, lr_0 = 2.0033e-04
Loss = 4.8350e-02, PNorm = 69.7965, GNorm = 0.6767, lr_0 = 2.0020e-04
Loss = 4.3258e-02, PNorm = 69.7982, GNorm = 0.5577, lr_0 = 2.0006e-04
Loss = 4.0973e-02, PNorm = 69.8009, GNorm = 0.4697, lr_0 = 1.9992e-04
Loss = 5.1434e-02, PNorm = 69.8033, GNorm = 0.4681, lr_0 = 1.9979e-04
Loss = 3.9634e-02, PNorm = 69.8043, GNorm = 0.7552, lr_0 = 1.9965e-04
Loss = 4.1022e-02, PNorm = 69.8070, GNorm = 0.4608, lr_0 = 1.9951e-04
Loss = 4.8324e-02, PNorm = 69.8112, GNorm = 0.5179, lr_0 = 1.9938e-04
Loss = 4.7613e-02, PNorm = 69.8140, GNorm = 0.4246, lr_0 = 1.9924e-04
Loss = 4.3519e-02, PNorm = 69.8157, GNorm = 0.5530, lr_0 = 1.9910e-04
Loss = 4.4144e-02, PNorm = 69.8185, GNorm = 0.4014, lr_0 = 1.9897e-04
Loss = 3.9657e-02, PNorm = 69.8205, GNorm = 0.5760, lr_0 = 1.9883e-04
Loss = 4.5337e-02, PNorm = 69.8227, GNorm = 0.5199, lr_0 = 1.9869e-04
Loss = 5.2371e-02, PNorm = 69.8254, GNorm = 1.2318, lr_0 = 1.9856e-04
Loss = 4.5628e-02, PNorm = 69.8294, GNorm = 0.7492, lr_0 = 1.9842e-04
Loss = 4.7961e-02, PNorm = 69.8340, GNorm = 0.7528, lr_0 = 1.9829e-04
Loss = 3.9657e-02, PNorm = 69.8381, GNorm = 0.8300, lr_0 = 1.9815e-04
Loss = 3.9539e-02, PNorm = 69.8415, GNorm = 0.5552, lr_0 = 1.9801e-04
Loss = 4.4113e-02, PNorm = 69.8445, GNorm = 0.4602, lr_0 = 1.9788e-04
Loss = 5.3067e-02, PNorm = 69.8478, GNorm = 0.5921, lr_0 = 1.9774e-04
Loss = 4.0498e-02, PNorm = 69.8529, GNorm = 0.4598, lr_0 = 1.9761e-04
Loss = 4.4286e-02, PNorm = 69.8558, GNorm = 0.4241, lr_0 = 1.9747e-04
Loss = 4.8152e-02, PNorm = 69.8585, GNorm = 0.5191, lr_0 = 1.9734e-04
Loss = 4.6999e-02, PNorm = 69.8620, GNorm = 0.4813, lr_0 = 1.9720e-04
Loss = 4.3933e-02, PNorm = 69.8674, GNorm = 0.3928, lr_0 = 1.9707e-04
Loss = 4.6026e-02, PNorm = 69.8715, GNorm = 0.4049, lr_0 = 1.9693e-04
Loss = 4.3708e-02, PNorm = 69.8734, GNorm = 0.5304, lr_0 = 1.9680e-04
Loss = 4.0434e-02, PNorm = 69.8757, GNorm = 0.5242, lr_0 = 1.9666e-04
Loss = 4.5212e-02, PNorm = 69.8796, GNorm = 0.5521, lr_0 = 1.9653e-04
Loss = 3.9972e-02, PNorm = 69.8824, GNorm = 0.4300, lr_0 = 1.9639e-04
Loss = 3.9507e-02, PNorm = 69.8840, GNorm = 0.5735, lr_0 = 1.9626e-04
Loss = 4.3995e-02, PNorm = 69.8872, GNorm = 0.5249, lr_0 = 1.9612e-04
Loss = 4.4028e-02, PNorm = 69.8902, GNorm = 0.6815, lr_0 = 1.9599e-04
Loss = 5.3545e-02, PNorm = 69.8941, GNorm = 0.5345, lr_0 = 1.9585e-04
Loss = 4.5502e-02, PNorm = 69.8981, GNorm = 0.5045, lr_0 = 1.9572e-04
Loss = 4.7972e-02, PNorm = 69.9006, GNorm = 0.8293, lr_0 = 1.9559e-04
Loss = 5.2238e-02, PNorm = 69.9030, GNorm = 1.1244, lr_0 = 1.9545e-04
Loss = 4.7655e-02, PNorm = 69.9052, GNorm = 0.5283, lr_0 = 1.9532e-04
Loss = 4.6407e-02, PNorm = 69.9077, GNorm = 0.4148, lr_0 = 1.9518e-04
Loss = 4.9479e-02, PNorm = 69.9094, GNorm = 0.6089, lr_0 = 1.9505e-04
Loss = 5.1568e-02, PNorm = 69.9122, GNorm = 0.4322, lr_0 = 1.9492e-04
Loss = 4.7823e-02, PNorm = 69.9172, GNorm = 0.5472, lr_0 = 1.9478e-04
Loss = 4.6837e-02, PNorm = 69.9215, GNorm = 0.5117, lr_0 = 1.9465e-04
Loss = 4.6976e-02, PNorm = 69.9241, GNorm = 0.4723, lr_0 = 1.9452e-04
Loss = 4.8475e-02, PNorm = 69.9266, GNorm = 0.7036, lr_0 = 1.9438e-04
Loss = 3.8933e-02, PNorm = 69.9280, GNorm = 0.5311, lr_0 = 1.9425e-04
Loss = 4.4650e-02, PNorm = 69.9306, GNorm = 0.6775, lr_0 = 1.9412e-04
Loss = 4.8962e-02, PNorm = 69.9333, GNorm = 0.4430, lr_0 = 1.9398e-04
Loss = 4.1361e-02, PNorm = 69.9363, GNorm = 0.6647, lr_0 = 1.9385e-04
Loss = 4.4992e-02, PNorm = 69.9376, GNorm = 0.4863, lr_0 = 1.9372e-04
Loss = 4.9832e-02, PNorm = 69.9390, GNorm = 0.4919, lr_0 = 1.9359e-04
Loss = 4.2048e-02, PNorm = 69.9421, GNorm = 0.5816, lr_0 = 1.9345e-04
Loss = 4.6713e-02, PNorm = 69.9446, GNorm = 0.4408, lr_0 = 1.9332e-04
Loss = 4.3824e-02, PNorm = 69.9488, GNorm = 0.4637, lr_0 = 1.9319e-04
Loss = 4.4964e-02, PNorm = 69.9529, GNorm = 0.4858, lr_0 = 1.9306e-04
Validation mae = 0.388338
Epoch 22
Loss = 3.9520e-02, PNorm = 69.9574, GNorm = 0.8085, lr_0 = 1.9292e-04
Loss = 3.2215e-02, PNorm = 69.9614, GNorm = 0.3751, lr_0 = 1.9279e-04
Loss = 4.1572e-02, PNorm = 69.9649, GNorm = 0.6324, lr_0 = 1.9266e-04
Loss = 4.5057e-02, PNorm = 69.9692, GNorm = 0.5343, lr_0 = 1.9253e-04
Loss = 4.1115e-02, PNorm = 69.9728, GNorm = 0.5192, lr_0 = 1.9240e-04
Loss = 3.7197e-02, PNorm = 69.9755, GNorm = 0.5191, lr_0 = 1.9226e-04
Loss = 3.5831e-02, PNorm = 69.9787, GNorm = 0.5348, lr_0 = 1.9213e-04
Loss = 3.8054e-02, PNorm = 69.9808, GNorm = 0.4666, lr_0 = 1.9200e-04
Loss = 3.8015e-02, PNorm = 69.9827, GNorm = 0.7573, lr_0 = 1.9187e-04
Loss = 4.5185e-02, PNorm = 69.9855, GNorm = 0.5929, lr_0 = 1.9174e-04
Loss = 3.6230e-02, PNorm = 69.9898, GNorm = 0.4978, lr_0 = 1.9161e-04
Loss = 4.0714e-02, PNorm = 69.9940, GNorm = 0.4140, lr_0 = 1.9148e-04
Loss = 3.7557e-02, PNorm = 69.9958, GNorm = 0.5121, lr_0 = 1.9134e-04
Loss = 3.9930e-02, PNorm = 69.9980, GNorm = 0.5586, lr_0 = 1.9121e-04
Loss = 3.9535e-02, PNorm = 70.0015, GNorm = 0.4396, lr_0 = 1.9108e-04
Loss = 3.9634e-02, PNorm = 70.0048, GNorm = 0.4342, lr_0 = 1.9095e-04
Loss = 4.1580e-02, PNorm = 70.0107, GNorm = 0.6862, lr_0 = 1.9082e-04
Loss = 4.1749e-02, PNorm = 70.0148, GNorm = 0.3372, lr_0 = 1.9069e-04
Loss = 3.5746e-02, PNorm = 70.0194, GNorm = 0.5616, lr_0 = 1.9056e-04
Loss = 3.7119e-02, PNorm = 70.0221, GNorm = 0.5876, lr_0 = 1.9043e-04
Loss = 3.8703e-02, PNorm = 70.0247, GNorm = 0.5375, lr_0 = 1.9030e-04
Loss = 4.2013e-02, PNorm = 70.0286, GNorm = 0.5481, lr_0 = 1.9017e-04
Loss = 4.2151e-02, PNorm = 70.0304, GNorm = 0.7326, lr_0 = 1.9004e-04
Loss = 4.4610e-02, PNorm = 70.0330, GNorm = 0.6190, lr_0 = 1.8991e-04
Loss = 3.5513e-02, PNorm = 70.0372, GNorm = 0.4490, lr_0 = 1.8978e-04
Loss = 3.9202e-02, PNorm = 70.0410, GNorm = 0.8760, lr_0 = 1.8965e-04
Loss = 3.6424e-02, PNorm = 70.0444, GNorm = 0.6621, lr_0 = 1.8952e-04
Loss = 3.9224e-02, PNorm = 70.0480, GNorm = 0.4758, lr_0 = 1.8939e-04
Loss = 4.9732e-02, PNorm = 70.0513, GNorm = 0.6345, lr_0 = 1.8926e-04
Loss = 3.5801e-02, PNorm = 70.0552, GNorm = 0.8741, lr_0 = 1.8913e-04
Loss = 4.0129e-02, PNorm = 70.0578, GNorm = 0.4924, lr_0 = 1.8900e-04
Loss = 4.5075e-02, PNorm = 70.0619, GNorm = 0.7224, lr_0 = 1.8887e-04
Loss = 4.5569e-02, PNorm = 70.0667, GNorm = 0.4626, lr_0 = 1.8874e-04
Loss = 4.0746e-02, PNorm = 70.0696, GNorm = 0.4114, lr_0 = 1.8861e-04
Loss = 4.1706e-02, PNorm = 70.0739, GNorm = 0.3507, lr_0 = 1.8848e-04
Loss = 5.0367e-02, PNorm = 70.0781, GNorm = 0.6593, lr_0 = 1.8835e-04
Loss = 4.1453e-02, PNorm = 70.0826, GNorm = 0.6204, lr_0 = 1.8822e-04
Loss = 4.1628e-02, PNorm = 70.0861, GNorm = 0.4885, lr_0 = 1.8809e-04
Loss = 4.3384e-02, PNorm = 70.0897, GNorm = 0.4678, lr_0 = 1.8797e-04
Loss = 3.8895e-02, PNorm = 70.0930, GNorm = 0.5587, lr_0 = 1.8784e-04
Loss = 4.2329e-02, PNorm = 70.0955, GNorm = 0.3892, lr_0 = 1.8771e-04
Loss = 4.3958e-02, PNorm = 70.0975, GNorm = 0.5371, lr_0 = 1.8758e-04
Loss = 4.0209e-02, PNorm = 70.0999, GNorm = 0.6682, lr_0 = 1.8745e-04
Loss = 3.6444e-02, PNorm = 70.1026, GNorm = 0.7154, lr_0 = 1.8732e-04
Loss = 4.6232e-02, PNorm = 70.1043, GNorm = 0.5078, lr_0 = 1.8719e-04
Loss = 4.2726e-02, PNorm = 70.1067, GNorm = 0.4727, lr_0 = 1.8707e-04
Loss = 3.8832e-02, PNorm = 70.1099, GNorm = 0.6085, lr_0 = 1.8694e-04
Loss = 4.7469e-02, PNorm = 70.1134, GNorm = 0.7058, lr_0 = 1.8681e-04
Loss = 4.4746e-02, PNorm = 70.1166, GNorm = 0.4443, lr_0 = 1.8668e-04
Loss = 4.4103e-02, PNorm = 70.1209, GNorm = 0.3967, lr_0 = 1.8655e-04
Loss = 4.4884e-02, PNorm = 70.1240, GNorm = 0.5896, lr_0 = 1.8643e-04
Loss = 4.0532e-02, PNorm = 70.1262, GNorm = 0.4785, lr_0 = 1.8630e-04
Loss = 3.9308e-02, PNorm = 70.1283, GNorm = 0.8247, lr_0 = 1.8617e-04
Loss = 4.4138e-02, PNorm = 70.1306, GNorm = 0.5454, lr_0 = 1.8604e-04
Loss = 3.7377e-02, PNorm = 70.1321, GNorm = 0.4843, lr_0 = 1.8592e-04
Loss = 4.0734e-02, PNorm = 70.1336, GNorm = 0.4948, lr_0 = 1.8579e-04
Loss = 4.0225e-02, PNorm = 70.1359, GNorm = 0.4159, lr_0 = 1.8566e-04
Loss = 5.0039e-02, PNorm = 70.1382, GNorm = 0.4536, lr_0 = 1.8553e-04
Loss = 3.9300e-02, PNorm = 70.1418, GNorm = 0.6951, lr_0 = 1.8541e-04
Loss = 4.0684e-02, PNorm = 70.1435, GNorm = 0.6818, lr_0 = 1.8528e-04
Loss = 4.8474e-02, PNorm = 70.1478, GNorm = 0.6216, lr_0 = 1.8515e-04
Loss = 3.9483e-02, PNorm = 70.1512, GNorm = 0.5160, lr_0 = 1.8503e-04
Loss = 4.5209e-02, PNorm = 70.1542, GNorm = 0.6454, lr_0 = 1.8490e-04
Loss = 3.9801e-02, PNorm = 70.1573, GNorm = 0.6494, lr_0 = 1.8477e-04
Loss = 4.2496e-02, PNorm = 70.1597, GNorm = 0.7206, lr_0 = 1.8465e-04
Loss = 4.8222e-02, PNorm = 70.1625, GNorm = 0.4945, lr_0 = 1.8452e-04
Loss = 3.6273e-02, PNorm = 70.1659, GNorm = 0.5824, lr_0 = 1.8439e-04
Loss = 4.1667e-02, PNorm = 70.1711, GNorm = 0.4895, lr_0 = 1.8427e-04
Loss = 4.3826e-02, PNorm = 70.1759, GNorm = 0.6250, lr_0 = 1.8414e-04
Loss = 3.7238e-02, PNorm = 70.1787, GNorm = 0.4189, lr_0 = 1.8401e-04
Loss = 4.8704e-02, PNorm = 70.1801, GNorm = 0.7176, lr_0 = 1.8389e-04
Loss = 3.8821e-02, PNorm = 70.1803, GNorm = 0.4950, lr_0 = 1.8376e-04
Loss = 4.9140e-02, PNorm = 70.1810, GNorm = 0.4363, lr_0 = 1.8364e-04
Loss = 3.6669e-02, PNorm = 70.1824, GNorm = 0.4519, lr_0 = 1.8351e-04
Loss = 4.3257e-02, PNorm = 70.1853, GNorm = 0.5394, lr_0 = 1.8338e-04
Loss = 4.8618e-02, PNorm = 70.1883, GNorm = 0.4834, lr_0 = 1.8326e-04
Loss = 4.0846e-02, PNorm = 70.1910, GNorm = 0.5314, lr_0 = 1.8313e-04
Loss = 4.0080e-02, PNorm = 70.1918, GNorm = 0.4760, lr_0 = 1.8301e-04
Loss = 3.7005e-02, PNorm = 70.1936, GNorm = 0.4637, lr_0 = 1.8288e-04
Loss = 4.0376e-02, PNorm = 70.1959, GNorm = 0.7056, lr_0 = 1.8276e-04
Loss = 4.3385e-02, PNorm = 70.1992, GNorm = 0.4020, lr_0 = 1.8263e-04
Loss = 3.7682e-02, PNorm = 70.2016, GNorm = 0.4280, lr_0 = 1.8251e-04
Loss = 3.4904e-02, PNorm = 70.2041, GNorm = 0.4105, lr_0 = 1.8238e-04
Loss = 4.0526e-02, PNorm = 70.2078, GNorm = 0.5405, lr_0 = 1.8226e-04
Loss = 4.3343e-02, PNorm = 70.2113, GNorm = 0.5830, lr_0 = 1.8213e-04
Loss = 3.7349e-02, PNorm = 70.2143, GNorm = 0.5115, lr_0 = 1.8201e-04
Loss = 3.9520e-02, PNorm = 70.2155, GNorm = 0.5078, lr_0 = 1.8188e-04
Loss = 4.2511e-02, PNorm = 70.2163, GNorm = 0.5143, lr_0 = 1.8176e-04
Loss = 4.3043e-02, PNorm = 70.2188, GNorm = 0.4999, lr_0 = 1.8163e-04
Loss = 3.6726e-02, PNorm = 70.2203, GNorm = 0.4118, lr_0 = 1.8151e-04
Loss = 3.8549e-02, PNorm = 70.2231, GNorm = 0.6751, lr_0 = 1.8138e-04
Loss = 3.7384e-02, PNorm = 70.2262, GNorm = 0.4040, lr_0 = 1.8126e-04
Loss = 3.7685e-02, PNorm = 70.2287, GNorm = 0.5110, lr_0 = 1.8114e-04
Loss = 3.7419e-02, PNorm = 70.2315, GNorm = 0.5711, lr_0 = 1.8101e-04
Loss = 4.9693e-02, PNorm = 70.2339, GNorm = 0.5522, lr_0 = 1.8089e-04
Loss = 4.7002e-02, PNorm = 70.2367, GNorm = 0.5139, lr_0 = 1.8076e-04
Loss = 5.1989e-02, PNorm = 70.2377, GNorm = 0.5784, lr_0 = 1.8064e-04
Loss = 4.4838e-02, PNorm = 70.2401, GNorm = 0.6650, lr_0 = 1.8052e-04
Loss = 4.4509e-02, PNorm = 70.2428, GNorm = 0.8671, lr_0 = 1.8039e-04
Loss = 3.7454e-02, PNorm = 70.2466, GNorm = 0.4927, lr_0 = 1.8027e-04
Loss = 4.0963e-02, PNorm = 70.2482, GNorm = 0.5557, lr_0 = 1.8015e-04
Loss = 4.7444e-02, PNorm = 70.2516, GNorm = 0.4283, lr_0 = 1.8002e-04
Loss = 4.3830e-02, PNorm = 70.2541, GNorm = 0.5964, lr_0 = 1.7990e-04
Loss = 4.2661e-02, PNorm = 70.2564, GNorm = 0.5718, lr_0 = 1.7978e-04
Loss = 3.9610e-02, PNorm = 70.2579, GNorm = 0.4055, lr_0 = 1.7965e-04
Loss = 4.1817e-02, PNorm = 70.2592, GNorm = 0.7513, lr_0 = 1.7953e-04
Loss = 4.7531e-02, PNorm = 70.2635, GNorm = 0.7557, lr_0 = 1.7941e-04
Loss = 3.9800e-02, PNorm = 70.2684, GNorm = 0.7030, lr_0 = 1.7928e-04
Loss = 4.0788e-02, PNorm = 70.2719, GNorm = 0.5816, lr_0 = 1.7916e-04
Loss = 4.4287e-02, PNorm = 70.2728, GNorm = 0.4913, lr_0 = 1.7904e-04
Loss = 4.0059e-02, PNorm = 70.2739, GNorm = 0.4656, lr_0 = 1.7892e-04
Loss = 4.7154e-02, PNorm = 70.2759, GNorm = 0.5699, lr_0 = 1.7879e-04
Loss = 4.2700e-02, PNorm = 70.2773, GNorm = 0.4939, lr_0 = 1.7867e-04
Loss = 4.0436e-02, PNorm = 70.2803, GNorm = 0.4298, lr_0 = 1.7855e-04
Loss = 4.4954e-02, PNorm = 70.2851, GNorm = 0.5773, lr_0 = 1.7843e-04
Loss = 3.8510e-02, PNorm = 70.2874, GNorm = 0.5547, lr_0 = 1.7830e-04
Loss = 3.8928e-02, PNorm = 70.2873, GNorm = 0.6199, lr_0 = 1.7818e-04
Loss = 4.7407e-02, PNorm = 70.2884, GNorm = 0.4503, lr_0 = 1.7806e-04
Loss = 4.1069e-02, PNorm = 70.2902, GNorm = 0.4448, lr_0 = 1.7794e-04
Loss = 4.4021e-02, PNorm = 70.2936, GNorm = 0.4635, lr_0 = 1.7782e-04
Validation mae = 0.392622
Epoch 23
Loss = 3.7607e-02, PNorm = 70.2961, GNorm = 0.8976, lr_0 = 1.7769e-04
Loss = 4.0575e-02, PNorm = 70.3005, GNorm = 0.3304, lr_0 = 1.7757e-04
Loss = 3.3685e-02, PNorm = 70.3051, GNorm = 0.3767, lr_0 = 1.7745e-04
Loss = 4.2386e-02, PNorm = 70.3096, GNorm = 0.5426, lr_0 = 1.7733e-04
Loss = 3.5837e-02, PNorm = 70.3120, GNorm = 0.4877, lr_0 = 1.7721e-04
Loss = 3.4266e-02, PNorm = 70.3152, GNorm = 0.3309, lr_0 = 1.7709e-04
Loss = 3.7681e-02, PNorm = 70.3180, GNorm = 0.4724, lr_0 = 1.7696e-04
Loss = 3.6019e-02, PNorm = 70.3199, GNorm = 0.5133, lr_0 = 1.7684e-04
Loss = 3.7241e-02, PNorm = 70.3224, GNorm = 0.5342, lr_0 = 1.7672e-04
Loss = 3.2558e-02, PNorm = 70.3236, GNorm = 0.5236, lr_0 = 1.7660e-04
Loss = 3.6222e-02, PNorm = 70.3256, GNorm = 0.4227, lr_0 = 1.7648e-04
Loss = 3.2350e-02, PNorm = 70.3282, GNorm = 0.4228, lr_0 = 1.7636e-04
Loss = 3.9342e-02, PNorm = 70.3316, GNorm = 0.5204, lr_0 = 1.7624e-04
Loss = 3.7985e-02, PNorm = 70.3352, GNorm = 0.4444, lr_0 = 1.7612e-04
Loss = 3.2450e-02, PNorm = 70.3376, GNorm = 0.3622, lr_0 = 1.7600e-04
Loss = 3.0062e-02, PNorm = 70.3404, GNorm = 0.4971, lr_0 = 1.7588e-04
Loss = 3.6634e-02, PNorm = 70.3409, GNorm = 0.5914, lr_0 = 1.7576e-04
Loss = 3.7744e-02, PNorm = 70.3435, GNorm = 0.5919, lr_0 = 1.7564e-04
Loss = 4.7299e-02, PNorm = 70.3463, GNorm = 0.7728, lr_0 = 1.7552e-04
Loss = 4.2385e-02, PNorm = 70.3479, GNorm = 0.9184, lr_0 = 1.7540e-04
Loss = 4.1398e-02, PNorm = 70.3506, GNorm = 0.6503, lr_0 = 1.7528e-04
Loss = 3.9163e-02, PNorm = 70.3550, GNorm = 0.4037, lr_0 = 1.7516e-04
Loss = 3.2958e-02, PNorm = 70.3594, GNorm = 0.5684, lr_0 = 1.7504e-04
Loss = 4.1766e-02, PNorm = 70.3615, GNorm = 0.6364, lr_0 = 1.7492e-04
Loss = 3.3877e-02, PNorm = 70.3635, GNorm = 0.6355, lr_0 = 1.7480e-04
Loss = 3.8662e-02, PNorm = 70.3650, GNorm = 0.6971, lr_0 = 1.7468e-04
Loss = 3.2507e-02, PNorm = 70.3680, GNorm = 0.3659, lr_0 = 1.7456e-04
Loss = 3.4747e-02, PNorm = 70.3717, GNorm = 0.4676, lr_0 = 1.7444e-04
Loss = 3.2970e-02, PNorm = 70.3737, GNorm = 0.4433, lr_0 = 1.7432e-04
Loss = 3.7958e-02, PNorm = 70.3770, GNorm = 0.5281, lr_0 = 1.7420e-04
Loss = 3.0761e-02, PNorm = 70.3806, GNorm = 0.3615, lr_0 = 1.7408e-04
Loss = 4.7824e-02, PNorm = 70.3835, GNorm = 0.4749, lr_0 = 1.7396e-04
Loss = 4.0876e-02, PNorm = 70.3871, GNorm = 0.5655, lr_0 = 1.7384e-04
Loss = 3.5454e-02, PNorm = 70.3904, GNorm = 0.3946, lr_0 = 1.7372e-04
Loss = 3.6377e-02, PNorm = 70.3948, GNorm = 0.4763, lr_0 = 1.7360e-04
Loss = 3.9349e-02, PNorm = 70.3976, GNorm = 0.4627, lr_0 = 1.7348e-04
Loss = 3.5768e-02, PNorm = 70.3979, GNorm = 0.4458, lr_0 = 1.7336e-04
Loss = 4.2174e-02, PNorm = 70.4002, GNorm = 0.5238, lr_0 = 1.7325e-04
Loss = 3.6876e-02, PNorm = 70.4031, GNorm = 0.4041, lr_0 = 1.7313e-04
Loss = 4.0116e-02, PNorm = 70.4061, GNorm = 0.5748, lr_0 = 1.7301e-04
Loss = 3.9959e-02, PNorm = 70.4091, GNorm = 0.5281, lr_0 = 1.7289e-04
Loss = 3.9606e-02, PNorm = 70.4119, GNorm = 0.4885, lr_0 = 1.7277e-04
Loss = 3.6540e-02, PNorm = 70.4139, GNorm = 0.4640, lr_0 = 1.7265e-04
Loss = 4.3603e-02, PNorm = 70.4163, GNorm = 0.5473, lr_0 = 1.7253e-04
Loss = 3.6598e-02, PNorm = 70.4180, GNorm = 0.4064, lr_0 = 1.7242e-04
Loss = 3.7523e-02, PNorm = 70.4195, GNorm = 0.4659, lr_0 = 1.7230e-04
Loss = 4.0415e-02, PNorm = 70.4220, GNorm = 0.5383, lr_0 = 1.7218e-04
Loss = 4.2062e-02, PNorm = 70.4267, GNorm = 0.5302, lr_0 = 1.7206e-04
Loss = 3.6560e-02, PNorm = 70.4305, GNorm = 0.6577, lr_0 = 1.7194e-04
Loss = 3.7109e-02, PNorm = 70.4339, GNorm = 0.4847, lr_0 = 1.7183e-04
Loss = 3.7032e-02, PNorm = 70.4363, GNorm = 0.5216, lr_0 = 1.7171e-04
Loss = 4.9918e-02, PNorm = 70.4378, GNorm = 0.5442, lr_0 = 1.7159e-04
Loss = 4.1736e-02, PNorm = 70.4402, GNorm = 0.4690, lr_0 = 1.7147e-04
Loss = 3.4016e-02, PNorm = 70.4439, GNorm = 0.3567, lr_0 = 1.7136e-04
Loss = 4.2706e-02, PNorm = 70.4474, GNorm = 0.5726, lr_0 = 1.7124e-04
Loss = 3.4364e-02, PNorm = 70.4500, GNorm = 0.4119, lr_0 = 1.7112e-04
Loss = 4.2809e-02, PNorm = 70.4524, GNorm = 0.4775, lr_0 = 1.7100e-04
Loss = 3.7319e-02, PNorm = 70.4544, GNorm = 0.4255, lr_0 = 1.7089e-04
Loss = 4.2266e-02, PNorm = 70.4573, GNorm = 0.6466, lr_0 = 1.7077e-04
Loss = 3.7993e-02, PNorm = 70.4587, GNorm = 0.4164, lr_0 = 1.7065e-04
Loss = 3.5020e-02, PNorm = 70.4610, GNorm = 0.3683, lr_0 = 1.7054e-04
Loss = 4.3606e-02, PNorm = 70.4642, GNorm = 0.3879, lr_0 = 1.7042e-04
Loss = 3.4061e-02, PNorm = 70.4659, GNorm = 0.5132, lr_0 = 1.7030e-04
Loss = 3.7305e-02, PNorm = 70.4679, GNorm = 0.5118, lr_0 = 1.7019e-04
Loss = 3.7046e-02, PNorm = 70.4709, GNorm = 0.5908, lr_0 = 1.7007e-04
Loss = 4.3172e-02, PNorm = 70.4750, GNorm = 0.5320, lr_0 = 1.6995e-04
Loss = 3.6475e-02, PNorm = 70.4776, GNorm = 0.4102, lr_0 = 1.6984e-04
Loss = 4.1657e-02, PNorm = 70.4782, GNorm = 0.4330, lr_0 = 1.6972e-04
Loss = 4.0722e-02, PNorm = 70.4807, GNorm = 0.4732, lr_0 = 1.6960e-04
Loss = 3.9463e-02, PNorm = 70.4848, GNorm = 0.6620, lr_0 = 1.6949e-04
Loss = 4.0174e-02, PNorm = 70.4880, GNorm = 0.4609, lr_0 = 1.6937e-04
Loss = 3.7971e-02, PNorm = 70.4906, GNorm = 0.4203, lr_0 = 1.6926e-04
Loss = 3.3719e-02, PNorm = 70.4929, GNorm = 0.4654, lr_0 = 1.6914e-04
Loss = 3.8466e-02, PNorm = 70.4960, GNorm = 0.5033, lr_0 = 1.6902e-04
Loss = 3.5711e-02, PNorm = 70.4991, GNorm = 0.3822, lr_0 = 1.6891e-04
Loss = 3.6747e-02, PNorm = 70.5003, GNorm = 0.6587, lr_0 = 1.6879e-04
Loss = 4.0480e-02, PNorm = 70.5030, GNorm = 0.4949, lr_0 = 1.6868e-04
Loss = 3.5581e-02, PNorm = 70.5043, GNorm = 0.2815, lr_0 = 1.6856e-04
Loss = 3.8575e-02, PNorm = 70.5059, GNorm = 0.5277, lr_0 = 1.6845e-04
Loss = 4.0041e-02, PNorm = 70.5071, GNorm = 0.4954, lr_0 = 1.6833e-04
Loss = 4.0716e-02, PNorm = 70.5096, GNorm = 0.6151, lr_0 = 1.6821e-04
Loss = 4.2238e-02, PNorm = 70.5136, GNorm = 0.4477, lr_0 = 1.6810e-04
Loss = 4.1699e-02, PNorm = 70.5169, GNorm = 0.5145, lr_0 = 1.6798e-04
Loss = 4.2337e-02, PNorm = 70.5195, GNorm = 0.4632, lr_0 = 1.6787e-04
Loss = 3.6439e-02, PNorm = 70.5203, GNorm = 0.4219, lr_0 = 1.6775e-04
Loss = 4.2509e-02, PNorm = 70.5217, GNorm = 0.4432, lr_0 = 1.6764e-04
Loss = 3.5782e-02, PNorm = 70.5233, GNorm = 0.4251, lr_0 = 1.6752e-04
Loss = 3.7777e-02, PNorm = 70.5268, GNorm = 0.5827, lr_0 = 1.6741e-04
Loss = 4.2584e-02, PNorm = 70.5283, GNorm = 0.6007, lr_0 = 1.6729e-04
Loss = 4.7037e-02, PNorm = 70.5303, GNorm = 0.8211, lr_0 = 1.6718e-04
Loss = 4.2971e-02, PNorm = 70.5350, GNorm = 0.6202, lr_0 = 1.6707e-04
Loss = 3.6340e-02, PNorm = 70.5383, GNorm = 0.6767, lr_0 = 1.6695e-04
Loss = 4.0061e-02, PNorm = 70.5408, GNorm = 0.7316, lr_0 = 1.6684e-04
Loss = 4.2556e-02, PNorm = 70.5433, GNorm = 0.7687, lr_0 = 1.6672e-04
Loss = 3.7518e-02, PNorm = 70.5449, GNorm = 0.4263, lr_0 = 1.6661e-04
Loss = 3.7391e-02, PNorm = 70.5475, GNorm = 0.7193, lr_0 = 1.6649e-04
Loss = 3.9835e-02, PNorm = 70.5498, GNorm = 0.5389, lr_0 = 1.6638e-04
Loss = 4.2337e-02, PNorm = 70.5509, GNorm = 0.8026, lr_0 = 1.6627e-04
Loss = 4.8040e-02, PNorm = 70.5529, GNorm = 0.5368, lr_0 = 1.6615e-04
Loss = 4.0217e-02, PNorm = 70.5560, GNorm = 0.5559, lr_0 = 1.6604e-04
Loss = 4.0311e-02, PNorm = 70.5596, GNorm = 0.4225, lr_0 = 1.6592e-04
Loss = 5.7113e-02, PNorm = 70.5627, GNorm = 0.6261, lr_0 = 1.6581e-04
Loss = 3.8212e-02, PNorm = 70.5657, GNorm = 0.5545, lr_0 = 1.6570e-04
Loss = 4.1138e-02, PNorm = 70.5679, GNorm = 0.5319, lr_0 = 1.6558e-04
Loss = 4.2992e-02, PNorm = 70.5705, GNorm = 0.6050, lr_0 = 1.6547e-04
Loss = 4.8355e-02, PNorm = 70.5732, GNorm = 0.4994, lr_0 = 1.6536e-04
Loss = 4.1249e-02, PNorm = 70.5756, GNorm = 0.6119, lr_0 = 1.6524e-04
Loss = 3.9193e-02, PNorm = 70.5778, GNorm = 0.7042, lr_0 = 1.6513e-04
Loss = 4.6314e-02, PNorm = 70.5799, GNorm = 0.4554, lr_0 = 1.6502e-04
Loss = 3.3946e-02, PNorm = 70.5810, GNorm = 0.5503, lr_0 = 1.6490e-04
Loss = 4.1844e-02, PNorm = 70.5803, GNorm = 0.4113, lr_0 = 1.6479e-04
Loss = 3.8513e-02, PNorm = 70.5831, GNorm = 0.6052, lr_0 = 1.6468e-04
Loss = 4.1700e-02, PNorm = 70.5867, GNorm = 0.5247, lr_0 = 1.6457e-04
Loss = 3.8222e-02, PNorm = 70.5912, GNorm = 0.5630, lr_0 = 1.6445e-04
Loss = 3.9054e-02, PNorm = 70.5933, GNorm = 0.6303, lr_0 = 1.6434e-04
Loss = 4.2381e-02, PNorm = 70.5958, GNorm = 0.4108, lr_0 = 1.6423e-04
Loss = 3.9022e-02, PNorm = 70.5980, GNorm = 0.5027, lr_0 = 1.6412e-04
Loss = 4.2417e-02, PNorm = 70.6012, GNorm = 0.5869, lr_0 = 1.6400e-04
Loss = 3.9690e-02, PNorm = 70.6050, GNorm = 0.6081, lr_0 = 1.6389e-04
Loss = 4.0692e-02, PNorm = 70.6074, GNorm = 0.5302, lr_0 = 1.6378e-04
Validation mae = 0.390864
Epoch 24
Loss = 2.8964e-02, PNorm = 70.6091, GNorm = 0.3631, lr_0 = 1.6367e-04
Loss = 4.2635e-02, PNorm = 70.6113, GNorm = 0.5285, lr_0 = 1.6355e-04
Loss = 4.0361e-02, PNorm = 70.6141, GNorm = 0.4969, lr_0 = 1.6344e-04
Loss = 4.9590e-02, PNorm = 70.6181, GNorm = 0.5164, lr_0 = 1.6333e-04
Loss = 3.5321e-02, PNorm = 70.6213, GNorm = 0.6032, lr_0 = 1.6322e-04
Loss = 3.5184e-02, PNorm = 70.6240, GNorm = 0.4765, lr_0 = 1.6311e-04
Loss = 3.9246e-02, PNorm = 70.6282, GNorm = 0.3776, lr_0 = 1.6299e-04
Loss = 3.2136e-02, PNorm = 70.6317, GNorm = 0.4264, lr_0 = 1.6288e-04
Loss = 3.6549e-02, PNorm = 70.6341, GNorm = 0.4135, lr_0 = 1.6277e-04
Loss = 4.1023e-02, PNorm = 70.6360, GNorm = 1.1043, lr_0 = 1.6266e-04
Loss = 2.9201e-02, PNorm = 70.6388, GNorm = 0.3134, lr_0 = 1.6255e-04
Loss = 3.9578e-02, PNorm = 70.6416, GNorm = 1.0472, lr_0 = 1.6244e-04
Loss = 3.7962e-02, PNorm = 70.6455, GNorm = 0.7268, lr_0 = 1.6233e-04
Loss = 4.0617e-02, PNorm = 70.6481, GNorm = 0.4879, lr_0 = 1.6221e-04
Loss = 3.2131e-02, PNorm = 70.6506, GNorm = 0.4072, lr_0 = 1.6210e-04
Loss = 3.3815e-02, PNorm = 70.6535, GNorm = 0.4324, lr_0 = 1.6199e-04
Loss = 3.3705e-02, PNorm = 70.6560, GNorm = 0.4736, lr_0 = 1.6188e-04
Loss = 3.2862e-02, PNorm = 70.6596, GNorm = 0.4321, lr_0 = 1.6177e-04
Loss = 3.2049e-02, PNorm = 70.6609, GNorm = 0.4375, lr_0 = 1.6166e-04
Loss = 3.9281e-02, PNorm = 70.6646, GNorm = 0.5579, lr_0 = 1.6155e-04
Loss = 3.6246e-02, PNorm = 70.6680, GNorm = 0.4305, lr_0 = 1.6144e-04
Loss = 3.5823e-02, PNorm = 70.6701, GNorm = 0.5411, lr_0 = 1.6133e-04
Loss = 3.5980e-02, PNorm = 70.6715, GNorm = 0.5827, lr_0 = 1.6122e-04
Loss = 4.0457e-02, PNorm = 70.6735, GNorm = 0.4744, lr_0 = 1.6111e-04
Loss = 3.5163e-02, PNorm = 70.6772, GNorm = 0.4594, lr_0 = 1.6100e-04
Loss = 4.0703e-02, PNorm = 70.6798, GNorm = 0.4938, lr_0 = 1.6089e-04
Loss = 3.7962e-02, PNorm = 70.6832, GNorm = 0.4261, lr_0 = 1.6078e-04
Loss = 3.6398e-02, PNorm = 70.6865, GNorm = 0.4391, lr_0 = 1.6067e-04
Loss = 3.3616e-02, PNorm = 70.6890, GNorm = 0.4735, lr_0 = 1.6056e-04
Loss = 3.7294e-02, PNorm = 70.6903, GNorm = 0.5523, lr_0 = 1.6045e-04
Loss = 3.1816e-02, PNorm = 70.6921, GNorm = 0.5213, lr_0 = 1.6034e-04
Loss = 3.5239e-02, PNorm = 70.6944, GNorm = 0.4437, lr_0 = 1.6023e-04
Loss = 3.4523e-02, PNorm = 70.6976, GNorm = 0.4299, lr_0 = 1.6012e-04
Loss = 3.7211e-02, PNorm = 70.7007, GNorm = 0.5041, lr_0 = 1.6001e-04
Loss = 3.4631e-02, PNorm = 70.7032, GNorm = 0.4482, lr_0 = 1.5990e-04
Loss = 3.0748e-02, PNorm = 70.7048, GNorm = 0.4225, lr_0 = 1.5979e-04
Loss = 3.5417e-02, PNorm = 70.7062, GNorm = 0.5445, lr_0 = 1.5968e-04
Loss = 3.5513e-02, PNorm = 70.7083, GNorm = 0.4588, lr_0 = 1.5957e-04
Loss = 3.9163e-02, PNorm = 70.7118, GNorm = 0.4657, lr_0 = 1.5946e-04
Loss = 3.6973e-02, PNorm = 70.7135, GNorm = 0.5291, lr_0 = 1.5935e-04
Loss = 3.8395e-02, PNorm = 70.7153, GNorm = 0.5048, lr_0 = 1.5924e-04
Loss = 3.5613e-02, PNorm = 70.7164, GNorm = 0.5283, lr_0 = 1.5913e-04
Loss = 3.7917e-02, PNorm = 70.7170, GNorm = 0.3702, lr_0 = 1.5902e-04
Loss = 3.8093e-02, PNorm = 70.7188, GNorm = 0.5627, lr_0 = 1.5891e-04
Loss = 3.5178e-02, PNorm = 70.7208, GNorm = 0.6740, lr_0 = 1.5880e-04
Loss = 3.2166e-02, PNorm = 70.7227, GNorm = 0.5780, lr_0 = 1.5870e-04
Loss = 3.5392e-02, PNorm = 70.7252, GNorm = 0.4318, lr_0 = 1.5859e-04
Loss = 3.6576e-02, PNorm = 70.7281, GNorm = 0.6275, lr_0 = 1.5848e-04
Loss = 3.7387e-02, PNorm = 70.7319, GNorm = 0.5766, lr_0 = 1.5837e-04
Loss = 4.1676e-02, PNorm = 70.7353, GNorm = 0.4381, lr_0 = 1.5826e-04
Loss = 3.2756e-02, PNorm = 70.7375, GNorm = 0.4232, lr_0 = 1.5815e-04
Loss = 3.6060e-02, PNorm = 70.7385, GNorm = 0.5540, lr_0 = 1.5804e-04
Loss = 3.1516e-02, PNorm = 70.7400, GNorm = 0.5231, lr_0 = 1.5794e-04
Loss = 3.3961e-02, PNorm = 70.7429, GNorm = 0.5444, lr_0 = 1.5783e-04
Loss = 4.1377e-02, PNorm = 70.7454, GNorm = 0.3979, lr_0 = 1.5772e-04
Loss = 3.8101e-02, PNorm = 70.7482, GNorm = 0.6409, lr_0 = 1.5761e-04
Loss = 3.4970e-02, PNorm = 70.7493, GNorm = 0.3772, lr_0 = 1.5750e-04
Loss = 3.3415e-02, PNorm = 70.7520, GNorm = 0.5733, lr_0 = 1.5740e-04
Loss = 4.0782e-02, PNorm = 70.7532, GNorm = 0.4345, lr_0 = 1.5729e-04
Loss = 3.3645e-02, PNorm = 70.7560, GNorm = 0.3867, lr_0 = 1.5718e-04
Loss = 3.3176e-02, PNorm = 70.7597, GNorm = 0.3820, lr_0 = 1.5707e-04
Loss = 4.2101e-02, PNorm = 70.7624, GNorm = 0.5676, lr_0 = 1.5697e-04
Loss = 3.3301e-02, PNorm = 70.7650, GNorm = 0.3512, lr_0 = 1.5686e-04
Loss = 3.3499e-02, PNorm = 70.7674, GNorm = 0.4672, lr_0 = 1.5675e-04
Loss = 3.5973e-02, PNorm = 70.7703, GNorm = 0.5553, lr_0 = 1.5664e-04
Loss = 3.9661e-02, PNorm = 70.7737, GNorm = 0.4463, lr_0 = 1.5654e-04
Loss = 3.7737e-02, PNorm = 70.7759, GNorm = 0.6171, lr_0 = 1.5643e-04
Loss = 4.1436e-02, PNorm = 70.7777, GNorm = 0.5053, lr_0 = 1.5632e-04
Loss = 3.9714e-02, PNorm = 70.7794, GNorm = 0.5932, lr_0 = 1.5621e-04
Loss = 3.4373e-02, PNorm = 70.7810, GNorm = 0.6608, lr_0 = 1.5611e-04
Loss = 3.6879e-02, PNorm = 70.7840, GNorm = 0.3984, lr_0 = 1.5600e-04
Loss = 4.0123e-02, PNorm = 70.7867, GNorm = 0.5894, lr_0 = 1.5589e-04
Loss = 3.9512e-02, PNorm = 70.7905, GNorm = 0.8635, lr_0 = 1.5579e-04
Loss = 4.1839e-02, PNorm = 70.7922, GNorm = 0.5650, lr_0 = 1.5568e-04
Loss = 3.8402e-02, PNorm = 70.7943, GNorm = 0.4534, lr_0 = 1.5557e-04
Loss = 3.9619e-02, PNorm = 70.7966, GNorm = 0.7325, lr_0 = 1.5547e-04
Loss = 3.8345e-02, PNorm = 70.7982, GNorm = 0.6422, lr_0 = 1.5536e-04
Loss = 3.1930e-02, PNorm = 70.7999, GNorm = 0.4993, lr_0 = 1.5525e-04
Loss = 4.0610e-02, PNorm = 70.8018, GNorm = 0.5114, lr_0 = 1.5515e-04
Loss = 3.6173e-02, PNorm = 70.8032, GNorm = 0.4807, lr_0 = 1.5504e-04
Loss = 3.8005e-02, PNorm = 70.8051, GNorm = 0.6199, lr_0 = 1.5493e-04
Loss = 3.5503e-02, PNorm = 70.8068, GNorm = 0.4410, lr_0 = 1.5483e-04
Loss = 3.5775e-02, PNorm = 70.8085, GNorm = 0.6492, lr_0 = 1.5472e-04
Loss = 3.6815e-02, PNorm = 70.8105, GNorm = 0.4113, lr_0 = 1.5462e-04
Loss = 3.4795e-02, PNorm = 70.8116, GNorm = 0.5468, lr_0 = 1.5451e-04
Loss = 3.7285e-02, PNorm = 70.8125, GNorm = 0.3927, lr_0 = 1.5440e-04
Loss = 3.0304e-02, PNorm = 70.8150, GNorm = 0.7078, lr_0 = 1.5430e-04
Loss = 3.9640e-02, PNorm = 70.8163, GNorm = 0.5241, lr_0 = 1.5419e-04
Loss = 3.9794e-02, PNorm = 70.8184, GNorm = 0.8060, lr_0 = 1.5409e-04
Loss = 3.4795e-02, PNorm = 70.8203, GNorm = 0.6105, lr_0 = 1.5398e-04
Loss = 3.4468e-02, PNorm = 70.8228, GNorm = 0.3988, lr_0 = 1.5388e-04
Loss = 4.0491e-02, PNorm = 70.8259, GNorm = 0.5993, lr_0 = 1.5377e-04
Loss = 4.3867e-02, PNorm = 70.8285, GNorm = 0.7276, lr_0 = 1.5367e-04
Loss = 3.7479e-02, PNorm = 70.8309, GNorm = 0.6369, lr_0 = 1.5356e-04
Loss = 3.6859e-02, PNorm = 70.8331, GNorm = 0.5007, lr_0 = 1.5346e-04
Loss = 4.0185e-02, PNorm = 70.8349, GNorm = 0.6737, lr_0 = 1.5335e-04
Loss = 4.1071e-02, PNorm = 70.8377, GNorm = 0.4418, lr_0 = 1.5325e-04
Loss = 4.2647e-02, PNorm = 70.8410, GNorm = 0.7062, lr_0 = 1.5314e-04
Loss = 3.2983e-02, PNorm = 70.8438, GNorm = 0.4065, lr_0 = 1.5304e-04
Loss = 3.5193e-02, PNorm = 70.8459, GNorm = 0.4847, lr_0 = 1.5293e-04
Loss = 4.0705e-02, PNorm = 70.8473, GNorm = 0.5036, lr_0 = 1.5283e-04
Loss = 3.9962e-02, PNorm = 70.8494, GNorm = 0.4837, lr_0 = 1.5272e-04
Loss = 3.4252e-02, PNorm = 70.8515, GNorm = 0.4571, lr_0 = 1.5262e-04
Loss = 4.0543e-02, PNorm = 70.8534, GNorm = 0.6043, lr_0 = 1.5251e-04
Loss = 3.5837e-02, PNorm = 70.8550, GNorm = 0.4421, lr_0 = 1.5241e-04
Loss = 3.8512e-02, PNorm = 70.8571, GNorm = 0.6215, lr_0 = 1.5230e-04
Loss = 4.3066e-02, PNorm = 70.8595, GNorm = 0.5225, lr_0 = 1.5220e-04
Loss = 4.4603e-02, PNorm = 70.8617, GNorm = 0.5855, lr_0 = 1.5209e-04
Loss = 3.9068e-02, PNorm = 70.8643, GNorm = 0.3880, lr_0 = 1.5199e-04
Loss = 3.6373e-02, PNorm = 70.8663, GNorm = 0.4913, lr_0 = 1.5189e-04
Loss = 3.4913e-02, PNorm = 70.8681, GNorm = 0.5520, lr_0 = 1.5178e-04
Loss = 4.3695e-02, PNorm = 70.8711, GNorm = 0.4965, lr_0 = 1.5168e-04
Loss = 3.7740e-02, PNorm = 70.8743, GNorm = 0.6335, lr_0 = 1.5157e-04
Loss = 4.2426e-02, PNorm = 70.8751, GNorm = 0.5493, lr_0 = 1.5147e-04
Loss = 3.9799e-02, PNorm = 70.8753, GNorm = 0.5407, lr_0 = 1.5137e-04
Loss = 3.8200e-02, PNorm = 70.8766, GNorm = 0.5182, lr_0 = 1.5126e-04
Loss = 3.8511e-02, PNorm = 70.8780, GNorm = 0.5495, lr_0 = 1.5116e-04
Loss = 3.3041e-02, PNorm = 70.8796, GNorm = 0.4790, lr_0 = 1.5106e-04
Loss = 3.7465e-02, PNorm = 70.8811, GNorm = 0.4834, lr_0 = 1.5095e-04
Loss = 3.0761e-02, PNorm = 70.8821, GNorm = 0.6120, lr_0 = 1.5085e-04
Validation mae = 0.386693
Epoch 25
Loss = 3.5632e-02, PNorm = 70.8836, GNorm = 0.5572, lr_0 = 1.5075e-04
Loss = 3.4066e-02, PNorm = 70.8858, GNorm = 0.4362, lr_0 = 1.5064e-04
Loss = 3.1496e-02, PNorm = 70.8886, GNorm = 0.4856, lr_0 = 1.5054e-04
Loss = 2.9567e-02, PNorm = 70.8915, GNorm = 0.3526, lr_0 = 1.5044e-04
Loss = 3.5764e-02, PNorm = 70.8936, GNorm = 0.5990, lr_0 = 1.5033e-04
Loss = 3.2935e-02, PNorm = 70.8966, GNorm = 0.4509, lr_0 = 1.5023e-04
Loss = 2.9681e-02, PNorm = 70.8997, GNorm = 0.4954, lr_0 = 1.5013e-04
Loss = 3.2600e-02, PNorm = 70.9015, GNorm = 0.4592, lr_0 = 1.5002e-04
Loss = 3.4263e-02, PNorm = 70.9039, GNorm = 0.3668, lr_0 = 1.4992e-04
Loss = 3.6575e-02, PNorm = 70.9073, GNorm = 0.4232, lr_0 = 1.4982e-04
Loss = 3.1280e-02, PNorm = 70.9108, GNorm = 0.3506, lr_0 = 1.4972e-04
Loss = 3.4014e-02, PNorm = 70.9138, GNorm = 0.4483, lr_0 = 1.4961e-04
Loss = 3.0963e-02, PNorm = 70.9157, GNorm = 0.3646, lr_0 = 1.4951e-04
Loss = 3.4561e-02, PNorm = 70.9183, GNorm = 0.4814, lr_0 = 1.4941e-04
Loss = 3.0415e-02, PNorm = 70.9208, GNorm = 0.5056, lr_0 = 1.4931e-04
Loss = 3.0733e-02, PNorm = 70.9238, GNorm = 0.4483, lr_0 = 1.4920e-04
Loss = 2.7272e-02, PNorm = 70.9251, GNorm = 0.5317, lr_0 = 1.4910e-04
Loss = 3.1758e-02, PNorm = 70.9270, GNorm = 0.5396, lr_0 = 1.4900e-04
Loss = 3.6810e-02, PNorm = 70.9289, GNorm = 0.3929, lr_0 = 1.4890e-04
Loss = 3.8361e-02, PNorm = 70.9305, GNorm = 0.5639, lr_0 = 1.4880e-04
Loss = 3.6717e-02, PNorm = 70.9313, GNorm = 0.3458, lr_0 = 1.4869e-04
Loss = 3.7292e-02, PNorm = 70.9322, GNorm = 0.4754, lr_0 = 1.4859e-04
Loss = 3.3519e-02, PNorm = 70.9338, GNorm = 0.4443, lr_0 = 1.4849e-04
Loss = 3.6399e-02, PNorm = 70.9353, GNorm = 0.5619, lr_0 = 1.4839e-04
Loss = 3.5211e-02, PNorm = 70.9377, GNorm = 0.4766, lr_0 = 1.4829e-04
Loss = 3.5519e-02, PNorm = 70.9394, GNorm = 0.6534, lr_0 = 1.4818e-04
Loss = 3.7220e-02, PNorm = 70.9421, GNorm = 0.5647, lr_0 = 1.4808e-04
Loss = 3.0817e-02, PNorm = 70.9452, GNorm = 0.5317, lr_0 = 1.4798e-04
Loss = 4.3158e-02, PNorm = 70.9477, GNorm = 0.6191, lr_0 = 1.4788e-04
Loss = 3.7846e-02, PNorm = 70.9496, GNorm = 0.4302, lr_0 = 1.4778e-04
Loss = 3.8609e-02, PNorm = 70.9527, GNorm = 0.6628, lr_0 = 1.4768e-04
Loss = 3.4658e-02, PNorm = 70.9550, GNorm = 0.5039, lr_0 = 1.4758e-04
Loss = 3.2924e-02, PNorm = 70.9557, GNorm = 0.5815, lr_0 = 1.4748e-04
Loss = 3.0412e-02, PNorm = 70.9575, GNorm = 0.4788, lr_0 = 1.4737e-04
Loss = 3.3084e-02, PNorm = 70.9604, GNorm = 0.3666, lr_0 = 1.4727e-04
Loss = 3.6031e-02, PNorm = 70.9627, GNorm = 0.5687, lr_0 = 1.4717e-04
Loss = 3.7890e-02, PNorm = 70.9649, GNorm = 0.3505, lr_0 = 1.4707e-04
Loss = 3.5187e-02, PNorm = 70.9670, GNorm = 0.4262, lr_0 = 1.4697e-04
Loss = 3.4725e-02, PNorm = 70.9686, GNorm = 0.6929, lr_0 = 1.4687e-04
Loss = 3.4485e-02, PNorm = 70.9703, GNorm = 0.5645, lr_0 = 1.4677e-04
Loss = 3.5276e-02, PNorm = 70.9724, GNorm = 0.5571, lr_0 = 1.4667e-04
Loss = 3.4946e-02, PNorm = 70.9750, GNorm = 0.7329, lr_0 = 1.4657e-04
Loss = 3.2224e-02, PNorm = 70.9763, GNorm = 0.5661, lr_0 = 1.4647e-04
Loss = 3.7801e-02, PNorm = 70.9795, GNorm = 0.4727, lr_0 = 1.4637e-04
Loss = 3.8859e-02, PNorm = 70.9825, GNorm = 0.4424, lr_0 = 1.4627e-04
Loss = 3.3371e-02, PNorm = 70.9846, GNorm = 0.5251, lr_0 = 1.4617e-04
Loss = 4.2000e-02, PNorm = 70.9872, GNorm = 0.4738, lr_0 = 1.4607e-04
Loss = 3.2206e-02, PNorm = 70.9905, GNorm = 0.7427, lr_0 = 1.4597e-04
Loss = 3.1307e-02, PNorm = 70.9919, GNorm = 0.4715, lr_0 = 1.4587e-04
Loss = 3.9328e-02, PNorm = 70.9934, GNorm = 0.5245, lr_0 = 1.4577e-04
Loss = 2.8821e-02, PNorm = 70.9963, GNorm = 0.4479, lr_0 = 1.4567e-04
Loss = 3.6145e-02, PNorm = 70.9970, GNorm = 0.4806, lr_0 = 1.4557e-04
Loss = 2.9545e-02, PNorm = 70.9997, GNorm = 0.4838, lr_0 = 1.4547e-04
Loss = 3.3524e-02, PNorm = 71.0023, GNorm = 0.8010, lr_0 = 1.4537e-04
Loss = 3.8232e-02, PNorm = 71.0041, GNorm = 0.4437, lr_0 = 1.4527e-04
Loss = 3.7404e-02, PNorm = 71.0052, GNorm = 0.5303, lr_0 = 1.4517e-04
Loss = 3.0187e-02, PNorm = 71.0064, GNorm = 0.6403, lr_0 = 1.4507e-04
Loss = 3.4910e-02, PNorm = 71.0077, GNorm = 0.4844, lr_0 = 1.4497e-04
Loss = 3.2823e-02, PNorm = 71.0097, GNorm = 0.4025, lr_0 = 1.4487e-04
Loss = 2.9761e-02, PNorm = 71.0120, GNorm = 0.5965, lr_0 = 1.4477e-04
Loss = 3.7897e-02, PNorm = 71.0141, GNorm = 0.5038, lr_0 = 1.4467e-04
Loss = 4.0063e-02, PNorm = 71.0173, GNorm = 0.4354, lr_0 = 1.4457e-04
Loss = 3.6673e-02, PNorm = 71.0201, GNorm = 0.5250, lr_0 = 1.4447e-04
Loss = 3.9359e-02, PNorm = 71.0226, GNorm = 0.4538, lr_0 = 1.4438e-04
Loss = 3.4774e-02, PNorm = 71.0253, GNorm = 0.6664, lr_0 = 1.4428e-04
Loss = 3.2822e-02, PNorm = 71.0277, GNorm = 0.3656, lr_0 = 1.4418e-04
Loss = 3.2515e-02, PNorm = 71.0301, GNorm = 0.3372, lr_0 = 1.4408e-04
Loss = 3.6802e-02, PNorm = 71.0320, GNorm = 0.4750, lr_0 = 1.4398e-04
Loss = 3.0404e-02, PNorm = 71.0339, GNorm = 0.3798, lr_0 = 1.4388e-04
Loss = 3.2646e-02, PNorm = 71.0352, GNorm = 0.4845, lr_0 = 1.4378e-04
Loss = 3.6139e-02, PNorm = 71.0363, GNorm = 0.4128, lr_0 = 1.4368e-04
Loss = 3.5333e-02, PNorm = 71.0386, GNorm = 0.4952, lr_0 = 1.4359e-04
Loss = 3.6581e-02, PNorm = 71.0408, GNorm = 0.4718, lr_0 = 1.4349e-04
Loss = 3.5871e-02, PNorm = 71.0423, GNorm = 0.4531, lr_0 = 1.4339e-04
Loss = 3.1874e-02, PNorm = 71.0433, GNorm = 0.3635, lr_0 = 1.4329e-04
Loss = 4.6067e-02, PNorm = 71.0443, GNorm = 0.6618, lr_0 = 1.4319e-04
Loss = 3.2951e-02, PNorm = 71.0465, GNorm = 0.4010, lr_0 = 1.4310e-04
Loss = 3.6959e-02, PNorm = 71.0506, GNorm = 0.5060, lr_0 = 1.4300e-04
Loss = 4.0009e-02, PNorm = 71.0530, GNorm = 0.3700, lr_0 = 1.4290e-04
Loss = 3.6243e-02, PNorm = 71.0546, GNorm = 0.4995, lr_0 = 1.4280e-04
Loss = 3.5982e-02, PNorm = 71.0562, GNorm = 0.4208, lr_0 = 1.4270e-04
Loss = 3.0749e-02, PNorm = 71.0584, GNorm = 0.3411, lr_0 = 1.4261e-04
Loss = 4.0072e-02, PNorm = 71.0594, GNorm = 0.6114, lr_0 = 1.4251e-04
Loss = 3.3315e-02, PNorm = 71.0612, GNorm = 0.3077, lr_0 = 1.4241e-04
Loss = 3.3785e-02, PNorm = 71.0626, GNorm = 0.4151, lr_0 = 1.4231e-04
Loss = 3.1583e-02, PNorm = 71.0641, GNorm = 0.4390, lr_0 = 1.4222e-04
Loss = 4.1924e-02, PNorm = 71.0658, GNorm = 0.8765, lr_0 = 1.4212e-04
Loss = 3.5103e-02, PNorm = 71.0672, GNorm = 0.4371, lr_0 = 1.4202e-04
Loss = 3.3452e-02, PNorm = 71.0688, GNorm = 0.4300, lr_0 = 1.4192e-04
Loss = 4.1380e-02, PNorm = 71.0698, GNorm = 0.4908, lr_0 = 1.4183e-04
Loss = 4.0517e-02, PNorm = 71.0723, GNorm = 0.7377, lr_0 = 1.4173e-04
Loss = 3.3175e-02, PNorm = 71.0733, GNorm = 0.5198, lr_0 = 1.4163e-04
Loss = 3.5026e-02, PNorm = 71.0745, GNorm = 0.5481, lr_0 = 1.4153e-04
Loss = 3.5669e-02, PNorm = 71.0770, GNorm = 0.4102, lr_0 = 1.4144e-04
Loss = 3.1593e-02, PNorm = 71.0801, GNorm = 0.4231, lr_0 = 1.4134e-04
Loss = 3.6028e-02, PNorm = 71.0812, GNorm = 0.3852, lr_0 = 1.4124e-04
Loss = 3.7372e-02, PNorm = 71.0819, GNorm = 0.4127, lr_0 = 1.4115e-04
Loss = 3.9583e-02, PNorm = 71.0834, GNorm = 0.7298, lr_0 = 1.4105e-04
Loss = 2.9946e-02, PNorm = 71.0856, GNorm = 0.3652, lr_0 = 1.4095e-04
Loss = 3.7936e-02, PNorm = 71.0865, GNorm = 0.4664, lr_0 = 1.4086e-04
Loss = 3.8031e-02, PNorm = 71.0876, GNorm = 0.5397, lr_0 = 1.4076e-04
Loss = 3.9185e-02, PNorm = 71.0888, GNorm = 0.6965, lr_0 = 1.4066e-04
Loss = 3.6618e-02, PNorm = 71.0904, GNorm = 0.3319, lr_0 = 1.4057e-04
Loss = 3.1815e-02, PNorm = 71.0930, GNorm = 0.4321, lr_0 = 1.4047e-04
Loss = 3.7481e-02, PNorm = 71.0946, GNorm = 0.5585, lr_0 = 1.4038e-04
Loss = 3.5177e-02, PNorm = 71.0959, GNorm = 0.4850, lr_0 = 1.4028e-04
Loss = 4.0055e-02, PNorm = 71.0961, GNorm = 0.5989, lr_0 = 1.4018e-04
Loss = 4.1207e-02, PNorm = 71.0979, GNorm = 0.4784, lr_0 = 1.4009e-04
Loss = 3.5097e-02, PNorm = 71.1000, GNorm = 0.4384, lr_0 = 1.3999e-04
Loss = 3.9357e-02, PNorm = 71.1024, GNorm = 0.6740, lr_0 = 1.3990e-04
Loss = 3.1543e-02, PNorm = 71.1039, GNorm = 0.4290, lr_0 = 1.3980e-04
Loss = 3.6924e-02, PNorm = 71.1048, GNorm = 0.4551, lr_0 = 1.3970e-04
Loss = 3.3238e-02, PNorm = 71.1065, GNorm = 0.5447, lr_0 = 1.3961e-04
Loss = 3.7512e-02, PNorm = 71.1087, GNorm = 0.5541, lr_0 = 1.3951e-04
Loss = 3.4089e-02, PNorm = 71.1111, GNorm = 0.4911, lr_0 = 1.3942e-04
Loss = 3.2377e-02, PNorm = 71.1134, GNorm = 0.8109, lr_0 = 1.3932e-04
Loss = 4.0237e-02, PNorm = 71.1149, GNorm = 0.8081, lr_0 = 1.3923e-04
Loss = 4.1104e-02, PNorm = 71.1165, GNorm = 0.4764, lr_0 = 1.3913e-04
Loss = 3.4653e-02, PNorm = 71.1184, GNorm = 0.3931, lr_0 = 1.3904e-04
Loss = 3.6704e-02, PNorm = 71.1195, GNorm = 0.4248, lr_0 = 1.3894e-04
Validation mae = 0.390152
Epoch 26
Loss = 3.1829e-02, PNorm = 71.1222, GNorm = 0.4857, lr_0 = 1.3884e-04
Loss = 3.3478e-02, PNorm = 71.1248, GNorm = 0.5757, lr_0 = 1.3875e-04
Loss = 3.3198e-02, PNorm = 71.1268, GNorm = 0.4813, lr_0 = 1.3865e-04
Loss = 3.2281e-02, PNorm = 71.1305, GNorm = 0.3995, lr_0 = 1.3856e-04
Loss = 2.9352e-02, PNorm = 71.1329, GNorm = 0.9157, lr_0 = 1.3846e-04
Loss = 3.4848e-02, PNorm = 71.1348, GNorm = 0.5416, lr_0 = 1.3837e-04
Loss = 2.8491e-02, PNorm = 71.1369, GNorm = 0.4482, lr_0 = 1.3828e-04
Loss = 3.2320e-02, PNorm = 71.1388, GNorm = 0.4338, lr_0 = 1.3818e-04
Loss = 2.9490e-02, PNorm = 71.1412, GNorm = 0.5199, lr_0 = 1.3809e-04
Loss = 3.3615e-02, PNorm = 71.1433, GNorm = 0.4675, lr_0 = 1.3799e-04
Loss = 3.2956e-02, PNorm = 71.1449, GNorm = 0.5318, lr_0 = 1.3790e-04
Loss = 3.0157e-02, PNorm = 71.1465, GNorm = 0.4036, lr_0 = 1.3780e-04
Loss = 3.4900e-02, PNorm = 71.1480, GNorm = 0.6879, lr_0 = 1.3771e-04
Loss = 3.3019e-02, PNorm = 71.1509, GNorm = 0.4315, lr_0 = 1.3761e-04
Loss = 3.6151e-02, PNorm = 71.1533, GNorm = 0.4749, lr_0 = 1.3752e-04
Loss = 3.3225e-02, PNorm = 71.1552, GNorm = 0.5201, lr_0 = 1.3742e-04
Loss = 3.3009e-02, PNorm = 71.1564, GNorm = 0.4068, lr_0 = 1.3733e-04
Loss = 3.0362e-02, PNorm = 71.1578, GNorm = 0.4737, lr_0 = 1.3724e-04
Loss = 2.8936e-02, PNorm = 71.1597, GNorm = 0.4698, lr_0 = 1.3714e-04
Loss = 3.0187e-02, PNorm = 71.1610, GNorm = 0.5432, lr_0 = 1.3705e-04
Loss = 3.3694e-02, PNorm = 71.1624, GNorm = 0.6151, lr_0 = 1.3695e-04
Loss = 2.9004e-02, PNorm = 71.1652, GNorm = 0.4061, lr_0 = 1.3686e-04
Loss = 2.8950e-02, PNorm = 71.1664, GNorm = 0.3877, lr_0 = 1.3677e-04
Loss = 3.0878e-02, PNorm = 71.1692, GNorm = 0.4314, lr_0 = 1.3667e-04
Loss = 3.3539e-02, PNorm = 71.1717, GNorm = 0.5003, lr_0 = 1.3658e-04
Loss = 3.4433e-02, PNorm = 71.1743, GNorm = 0.4838, lr_0 = 1.3649e-04
Loss = 3.1275e-02, PNorm = 71.1762, GNorm = 0.4928, lr_0 = 1.3639e-04
Loss = 3.2982e-02, PNorm = 71.1787, GNorm = 0.4926, lr_0 = 1.3630e-04
Loss = 3.3329e-02, PNorm = 71.1806, GNorm = 0.7495, lr_0 = 1.3621e-04
Loss = 3.4102e-02, PNorm = 71.1823, GNorm = 0.4269, lr_0 = 1.3611e-04
Loss = 3.3449e-02, PNorm = 71.1845, GNorm = 0.4718, lr_0 = 1.3602e-04
Loss = 3.4613e-02, PNorm = 71.1865, GNorm = 0.8040, lr_0 = 1.3593e-04
Loss = 3.2948e-02, PNorm = 71.1901, GNorm = 0.6299, lr_0 = 1.3583e-04
Loss = 3.4365e-02, PNorm = 71.1926, GNorm = 0.3513, lr_0 = 1.3574e-04
Loss = 3.4886e-02, PNorm = 71.1944, GNorm = 0.3660, lr_0 = 1.3565e-04
Loss = 3.6842e-02, PNorm = 71.1975, GNorm = 0.4713, lr_0 = 1.3555e-04
Loss = 3.1531e-02, PNorm = 71.1994, GNorm = 0.4940, lr_0 = 1.3546e-04
Loss = 3.3308e-02, PNorm = 71.2008, GNorm = 0.4528, lr_0 = 1.3537e-04
Loss = 3.5007e-02, PNorm = 71.2021, GNorm = 0.3238, lr_0 = 1.3528e-04
Loss = 2.8390e-02, PNorm = 71.2040, GNorm = 0.6423, lr_0 = 1.3518e-04
Loss = 3.0596e-02, PNorm = 71.2058, GNorm = 0.4331, lr_0 = 1.3509e-04
Loss = 3.1008e-02, PNorm = 71.2071, GNorm = 0.3386, lr_0 = 1.3500e-04
Loss = 2.7876e-02, PNorm = 71.2077, GNorm = 0.6457, lr_0 = 1.3491e-04
Loss = 2.9515e-02, PNorm = 71.2084, GNorm = 0.4616, lr_0 = 1.3481e-04
Loss = 3.3021e-02, PNorm = 71.2094, GNorm = 0.4346, lr_0 = 1.3472e-04
Loss = 3.7804e-02, PNorm = 71.2117, GNorm = 0.5912, lr_0 = 1.3463e-04
Loss = 3.5948e-02, PNorm = 71.2136, GNorm = 0.4199, lr_0 = 1.3454e-04
Loss = 2.8548e-02, PNorm = 71.2157, GNorm = 0.4763, lr_0 = 1.3444e-04
Loss = 3.7189e-02, PNorm = 71.2173, GNorm = 0.4384, lr_0 = 1.3435e-04
Loss = 3.2422e-02, PNorm = 71.2201, GNorm = 0.7206, lr_0 = 1.3426e-04
Loss = 3.3588e-02, PNorm = 71.2221, GNorm = 0.5500, lr_0 = 1.3417e-04
Loss = 3.4081e-02, PNorm = 71.2247, GNorm = 0.4305, lr_0 = 1.3408e-04
Loss = 2.8726e-02, PNorm = 71.2274, GNorm = 0.3691, lr_0 = 1.3398e-04
Loss = 3.5175e-02, PNorm = 71.2295, GNorm = 0.7659, lr_0 = 1.3389e-04
Loss = 3.2805e-02, PNorm = 71.2319, GNorm = 0.3144, lr_0 = 1.3380e-04
Loss = 2.8543e-02, PNorm = 71.2335, GNorm = 0.5053, lr_0 = 1.3371e-04
Loss = 3.3620e-02, PNorm = 71.2342, GNorm = 0.6339, lr_0 = 1.3362e-04
Loss = 3.3212e-02, PNorm = 71.2356, GNorm = 0.6372, lr_0 = 1.3353e-04
Loss = 3.4096e-02, PNorm = 71.2371, GNorm = 0.6144, lr_0 = 1.3343e-04
Loss = 3.4604e-02, PNorm = 71.2382, GNorm = 0.6046, lr_0 = 1.3334e-04
Loss = 3.6983e-02, PNorm = 71.2396, GNorm = 0.4049, lr_0 = 1.3325e-04
Loss = 3.1820e-02, PNorm = 71.2430, GNorm = 0.3518, lr_0 = 1.3316e-04
Loss = 3.4465e-02, PNorm = 71.2450, GNorm = 0.4742, lr_0 = 1.3307e-04
Loss = 3.4262e-02, PNorm = 71.2470, GNorm = 0.6132, lr_0 = 1.3298e-04
Loss = 3.1708e-02, PNorm = 71.2483, GNorm = 0.5936, lr_0 = 1.3289e-04
Loss = 3.4252e-02, PNorm = 71.2509, GNorm = 0.5025, lr_0 = 1.3280e-04
Loss = 3.3251e-02, PNorm = 71.2518, GNorm = 0.6268, lr_0 = 1.3270e-04
Loss = 3.0531e-02, PNorm = 71.2526, GNorm = 0.5640, lr_0 = 1.3261e-04
Loss = 2.8355e-02, PNorm = 71.2536, GNorm = 0.4066, lr_0 = 1.3252e-04
Loss = 3.4014e-02, PNorm = 71.2543, GNorm = 0.3997, lr_0 = 1.3243e-04
Loss = 3.0983e-02, PNorm = 71.2570, GNorm = 0.4083, lr_0 = 1.3234e-04
Loss = 3.5841e-02, PNorm = 71.2587, GNorm = 0.5529, lr_0 = 1.3225e-04
Loss = 4.1783e-02, PNorm = 71.2598, GNorm = 0.6137, lr_0 = 1.3216e-04
Loss = 3.0521e-02, PNorm = 71.2608, GNorm = 0.4299, lr_0 = 1.3207e-04
Loss = 3.4526e-02, PNorm = 71.2619, GNorm = 0.5591, lr_0 = 1.3198e-04
Loss = 3.1901e-02, PNorm = 71.2636, GNorm = 0.4206, lr_0 = 1.3189e-04
Loss = 3.5591e-02, PNorm = 71.2662, GNorm = 0.5204, lr_0 = 1.3180e-04
Loss = 3.5390e-02, PNorm = 71.2678, GNorm = 0.5366, lr_0 = 1.3171e-04
Loss = 3.9326e-02, PNorm = 71.2702, GNorm = 0.4110, lr_0 = 1.3162e-04
Loss = 3.4532e-02, PNorm = 71.2721, GNorm = 0.4885, lr_0 = 1.3153e-04
Loss = 3.3221e-02, PNorm = 71.2734, GNorm = 0.5680, lr_0 = 1.3144e-04
Loss = 3.3666e-02, PNorm = 71.2757, GNorm = 0.4652, lr_0 = 1.3135e-04
Loss = 3.5227e-02, PNorm = 71.2783, GNorm = 0.3721, lr_0 = 1.3126e-04
Loss = 3.5934e-02, PNorm = 71.2811, GNorm = 0.4731, lr_0 = 1.3117e-04
Loss = 2.8147e-02, PNorm = 71.2840, GNorm = 0.4638, lr_0 = 1.3108e-04
Loss = 3.4897e-02, PNorm = 71.2858, GNorm = 0.4559, lr_0 = 1.3099e-04
Loss = 3.2755e-02, PNorm = 71.2874, GNorm = 0.5722, lr_0 = 1.3090e-04
Loss = 3.5291e-02, PNorm = 71.2884, GNorm = 0.4822, lr_0 = 1.3081e-04
Loss = 3.5444e-02, PNorm = 71.2900, GNorm = 0.4337, lr_0 = 1.3072e-04
Loss = 3.2974e-02, PNorm = 71.2919, GNorm = 0.4703, lr_0 = 1.3063e-04
Loss = 3.0152e-02, PNorm = 71.2928, GNorm = 0.3512, lr_0 = 1.3054e-04
Loss = 4.4008e-02, PNorm = 71.2938, GNorm = 0.6212, lr_0 = 1.3045e-04
Loss = 3.9603e-02, PNorm = 71.2968, GNorm = 0.4784, lr_0 = 1.3036e-04
Loss = 3.5651e-02, PNorm = 71.2971, GNorm = 0.4783, lr_0 = 1.3027e-04
Loss = 3.2780e-02, PNorm = 71.2978, GNorm = 0.5648, lr_0 = 1.3018e-04
Loss = 3.3891e-02, PNorm = 71.2999, GNorm = 0.5289, lr_0 = 1.3009e-04
Loss = 3.4210e-02, PNorm = 71.3018, GNorm = 0.5691, lr_0 = 1.3000e-04
Loss = 3.0145e-02, PNorm = 71.3023, GNorm = 0.4066, lr_0 = 1.2992e-04
Loss = 3.1403e-02, PNorm = 71.3036, GNorm = 0.4172, lr_0 = 1.2983e-04
Loss = 3.4927e-02, PNorm = 71.3049, GNorm = 0.5688, lr_0 = 1.2974e-04
Loss = 3.7179e-02, PNorm = 71.3068, GNorm = 0.4606, lr_0 = 1.2965e-04
Loss = 2.7083e-02, PNorm = 71.3092, GNorm = 0.3853, lr_0 = 1.2956e-04
Loss = 3.5638e-02, PNorm = 71.3115, GNorm = 0.5633, lr_0 = 1.2947e-04
Loss = 3.7842e-02, PNorm = 71.3126, GNorm = 0.5477, lr_0 = 1.2938e-04
Loss = 3.3842e-02, PNorm = 71.3147, GNorm = 0.5739, lr_0 = 1.2929e-04
Loss = 3.4443e-02, PNorm = 71.3171, GNorm = 0.5199, lr_0 = 1.2921e-04
Loss = 3.3424e-02, PNorm = 71.3198, GNorm = 0.5350, lr_0 = 1.2912e-04
Loss = 3.7027e-02, PNorm = 71.3218, GNorm = 0.5790, lr_0 = 1.2903e-04
Loss = 5.2649e-02, PNorm = 71.3235, GNorm = 0.8078, lr_0 = 1.2894e-04
Loss = 3.4767e-02, PNorm = 71.3251, GNorm = 0.4662, lr_0 = 1.2885e-04
Loss = 2.8955e-02, PNorm = 71.3268, GNorm = 0.5579, lr_0 = 1.2876e-04
Loss = 3.0490e-02, PNorm = 71.3279, GNorm = 0.4758, lr_0 = 1.2867e-04
Loss = 3.0119e-02, PNorm = 71.3285, GNorm = 0.4642, lr_0 = 1.2859e-04
Loss = 3.5200e-02, PNorm = 71.3300, GNorm = 0.6149, lr_0 = 1.2850e-04
Loss = 3.3005e-02, PNorm = 71.3327, GNorm = 0.6467, lr_0 = 1.2841e-04
Loss = 3.5508e-02, PNorm = 71.3343, GNorm = 0.3923, lr_0 = 1.2832e-04
Loss = 3.3578e-02, PNorm = 71.3359, GNorm = 0.5308, lr_0 = 1.2823e-04
Loss = 3.7240e-02, PNorm = 71.3375, GNorm = 0.4029, lr_0 = 1.2815e-04
Loss = 3.6735e-02, PNorm = 71.3404, GNorm = 0.5365, lr_0 = 1.2806e-04
Loss = 3.1928e-02, PNorm = 71.3422, GNorm = 0.5046, lr_0 = 1.2797e-04
Validation mae = 0.394859
Epoch 27
Loss = 2.7462e-02, PNorm = 71.3452, GNorm = 0.6532, lr_0 = 1.2788e-04
Loss = 3.0178e-02, PNorm = 71.3479, GNorm = 0.3300, lr_0 = 1.2780e-04
Loss = 3.3982e-02, PNorm = 71.3494, GNorm = 0.4013, lr_0 = 1.2771e-04
Loss = 3.2034e-02, PNorm = 71.3507, GNorm = 0.6926, lr_0 = 1.2762e-04
Loss = 3.9599e-02, PNorm = 71.3531, GNorm = 0.4484, lr_0 = 1.2753e-04
Loss = 3.0731e-02, PNorm = 71.3556, GNorm = 0.4425, lr_0 = 1.2745e-04
Loss = 2.6606e-02, PNorm = 71.3572, GNorm = 0.4047, lr_0 = 1.2736e-04
Loss = 3.0469e-02, PNorm = 71.3596, GNorm = 0.5420, lr_0 = 1.2727e-04
Loss = 3.3386e-02, PNorm = 71.3614, GNorm = 0.3486, lr_0 = 1.2718e-04
Loss = 3.0153e-02, PNorm = 71.3625, GNorm = 0.4893, lr_0 = 1.2710e-04
Loss = 3.3006e-02, PNorm = 71.3639, GNorm = 0.5484, lr_0 = 1.2701e-04
Loss = 2.7036e-02, PNorm = 71.3660, GNorm = 0.3844, lr_0 = 1.2692e-04
Loss = 2.7638e-02, PNorm = 71.3673, GNorm = 0.3606, lr_0 = 1.2684e-04
Loss = 3.2918e-02, PNorm = 71.3685, GNorm = 0.4246, lr_0 = 1.2675e-04
Loss = 2.5932e-02, PNorm = 71.3702, GNorm = 0.4335, lr_0 = 1.2666e-04
Loss = 2.8836e-02, PNorm = 71.3716, GNorm = 0.5396, lr_0 = 1.2658e-04
Loss = 2.9961e-02, PNorm = 71.3729, GNorm = 0.5785, lr_0 = 1.2649e-04
Loss = 3.0825e-02, PNorm = 71.3743, GNorm = 0.6909, lr_0 = 1.2640e-04
Loss = 2.8048e-02, PNorm = 71.3772, GNorm = 0.3668, lr_0 = 1.2632e-04
Loss = 2.4284e-02, PNorm = 71.3787, GNorm = 0.4447, lr_0 = 1.2623e-04
Loss = 3.2519e-02, PNorm = 71.3802, GNorm = 0.2812, lr_0 = 1.2614e-04
Loss = 3.0708e-02, PNorm = 71.3827, GNorm = 0.3695, lr_0 = 1.2606e-04
Loss = 3.5427e-02, PNorm = 71.3846, GNorm = 0.5061, lr_0 = 1.2597e-04
Loss = 3.2718e-02, PNorm = 71.3856, GNorm = 0.4559, lr_0 = 1.2588e-04
Loss = 3.3093e-02, PNorm = 71.3867, GNorm = 0.4613, lr_0 = 1.2580e-04
Loss = 3.8411e-02, PNorm = 71.3882, GNorm = 0.4145, lr_0 = 1.2571e-04
Loss = 2.8905e-02, PNorm = 71.3896, GNorm = 0.4102, lr_0 = 1.2563e-04
Loss = 3.2018e-02, PNorm = 71.3920, GNorm = 0.4149, lr_0 = 1.2554e-04
Loss = 2.9525e-02, PNorm = 71.3947, GNorm = 0.3867, lr_0 = 1.2545e-04
Loss = 3.0681e-02, PNorm = 71.3968, GNorm = 0.6416, lr_0 = 1.2537e-04
Loss = 3.1096e-02, PNorm = 71.3980, GNorm = 0.5147, lr_0 = 1.2528e-04
Loss = 3.1689e-02, PNorm = 71.3986, GNorm = 0.5034, lr_0 = 1.2520e-04
Loss = 3.3664e-02, PNorm = 71.4002, GNorm = 0.5242, lr_0 = 1.2511e-04
Loss = 3.2612e-02, PNorm = 71.4023, GNorm = 0.7700, lr_0 = 1.2502e-04
Loss = 3.4990e-02, PNorm = 71.4044, GNorm = 0.3975, lr_0 = 1.2494e-04
Loss = 3.6315e-02, PNorm = 71.4060, GNorm = 0.3689, lr_0 = 1.2485e-04
Loss = 3.1713e-02, PNorm = 71.4078, GNorm = 0.4094, lr_0 = 1.2477e-04
Loss = 2.8289e-02, PNorm = 71.4105, GNorm = 0.4729, lr_0 = 1.2468e-04
Loss = 2.7808e-02, PNorm = 71.4121, GNorm = 0.4182, lr_0 = 1.2460e-04
Loss = 3.1102e-02, PNorm = 71.4135, GNorm = 0.5307, lr_0 = 1.2451e-04
Loss = 2.8642e-02, PNorm = 71.4160, GNorm = 0.3763, lr_0 = 1.2443e-04
Loss = 3.1878e-02, PNorm = 71.4178, GNorm = 0.4318, lr_0 = 1.2434e-04
Loss = 2.7338e-02, PNorm = 71.4195, GNorm = 0.4763, lr_0 = 1.2426e-04
Loss = 3.6099e-02, PNorm = 71.4209, GNorm = 0.5148, lr_0 = 1.2417e-04
Loss = 2.9236e-02, PNorm = 71.4224, GNorm = 0.4074, lr_0 = 1.2409e-04
Loss = 3.2222e-02, PNorm = 71.4238, GNorm = 0.6050, lr_0 = 1.2400e-04
Loss = 3.4341e-02, PNorm = 71.4252, GNorm = 0.5047, lr_0 = 1.2392e-04
Loss = 2.9396e-02, PNorm = 71.4275, GNorm = 0.4235, lr_0 = 1.2383e-04
Loss = 3.3066e-02, PNorm = 71.4290, GNorm = 0.4971, lr_0 = 1.2375e-04
Loss = 3.6833e-02, PNorm = 71.4307, GNorm = 0.4117, lr_0 = 1.2366e-04
Loss = 3.0269e-02, PNorm = 71.4321, GNorm = 0.4579, lr_0 = 1.2358e-04
Loss = 3.5140e-02, PNorm = 71.4345, GNorm = 0.4834, lr_0 = 1.2349e-04
Loss = 2.8395e-02, PNorm = 71.4365, GNorm = 0.4559, lr_0 = 1.2341e-04
Loss = 3.4548e-02, PNorm = 71.4393, GNorm = 0.3805, lr_0 = 1.2332e-04
Loss = 2.8176e-02, PNorm = 71.4417, GNorm = 0.5291, lr_0 = 1.2324e-04
Loss = 2.9923e-02, PNorm = 71.4435, GNorm = 0.4118, lr_0 = 1.2315e-04
Loss = 2.9728e-02, PNorm = 71.4440, GNorm = 0.4191, lr_0 = 1.2307e-04
Loss = 2.4783e-02, PNorm = 71.4448, GNorm = 0.4220, lr_0 = 1.2298e-04
Loss = 3.3525e-02, PNorm = 71.4463, GNorm = 0.3964, lr_0 = 1.2290e-04
Loss = 3.0723e-02, PNorm = 71.4475, GNorm = 0.5237, lr_0 = 1.2282e-04
Loss = 3.3759e-02, PNorm = 71.4495, GNorm = 0.4460, lr_0 = 1.2273e-04
Loss = 2.9434e-02, PNorm = 71.4516, GNorm = 0.6582, lr_0 = 1.2265e-04
Loss = 3.2308e-02, PNorm = 71.4541, GNorm = 0.3499, lr_0 = 1.2256e-04
Loss = 2.8059e-02, PNorm = 71.4556, GNorm = 0.2699, lr_0 = 1.2248e-04
Loss = 3.2934e-02, PNorm = 71.4573, GNorm = 0.6472, lr_0 = 1.2240e-04
Loss = 3.6154e-02, PNorm = 71.4588, GNorm = 0.4142, lr_0 = 1.2231e-04
Loss = 3.1316e-02, PNorm = 71.4607, GNorm = 0.5953, lr_0 = 1.2223e-04
Loss = 3.0944e-02, PNorm = 71.4618, GNorm = 0.5073, lr_0 = 1.2214e-04
Loss = 3.2546e-02, PNorm = 71.4627, GNorm = 0.7678, lr_0 = 1.2206e-04
Loss = 3.4246e-02, PNorm = 71.4637, GNorm = 0.5373, lr_0 = 1.2198e-04
Loss = 4.4430e-02, PNorm = 71.4655, GNorm = 0.7971, lr_0 = 1.2189e-04
Loss = 3.1223e-02, PNorm = 71.4674, GNorm = 0.5830, lr_0 = 1.2181e-04
Loss = 3.6144e-02, PNorm = 71.4681, GNorm = 0.5623, lr_0 = 1.2173e-04
Loss = 2.8017e-02, PNorm = 71.4692, GNorm = 0.6134, lr_0 = 1.2164e-04
Loss = 3.1843e-02, PNorm = 71.4712, GNorm = 0.5627, lr_0 = 1.2156e-04
Loss = 2.9782e-02, PNorm = 71.4722, GNorm = 0.4796, lr_0 = 1.2148e-04
Loss = 3.0331e-02, PNorm = 71.4737, GNorm = 0.3296, lr_0 = 1.2139e-04
Loss = 3.1180e-02, PNorm = 71.4757, GNorm = 0.3126, lr_0 = 1.2131e-04
Loss = 3.6903e-02, PNorm = 71.4765, GNorm = 0.6081, lr_0 = 1.2123e-04
Loss = 3.2530e-02, PNorm = 71.4773, GNorm = 0.4493, lr_0 = 1.2114e-04
Loss = 3.4057e-02, PNorm = 71.4795, GNorm = 0.6197, lr_0 = 1.2106e-04
Loss = 3.4543e-02, PNorm = 71.4818, GNorm = 0.4293, lr_0 = 1.2098e-04
Loss = 2.6840e-02, PNorm = 71.4832, GNorm = 0.4292, lr_0 = 1.2090e-04
Loss = 2.9174e-02, PNorm = 71.4841, GNorm = 0.4538, lr_0 = 1.2081e-04
Loss = 3.1140e-02, PNorm = 71.4857, GNorm = 0.4316, lr_0 = 1.2073e-04
Loss = 3.2570e-02, PNorm = 71.4882, GNorm = 0.4489, lr_0 = 1.2065e-04
Loss = 3.5752e-02, PNorm = 71.4893, GNorm = 0.4773, lr_0 = 1.2056e-04
Loss = 2.9874e-02, PNorm = 71.4900, GNorm = 0.3639, lr_0 = 1.2048e-04
Loss = 3.8348e-02, PNorm = 71.4917, GNorm = 0.5489, lr_0 = 1.2040e-04
Loss = 2.8981e-02, PNorm = 71.4935, GNorm = 0.5761, lr_0 = 1.2032e-04
Loss = 3.4127e-02, PNorm = 71.4957, GNorm = 0.5991, lr_0 = 1.2023e-04
Loss = 3.1340e-02, PNorm = 71.4973, GNorm = 0.4145, lr_0 = 1.2015e-04
Loss = 3.1831e-02, PNorm = 71.4984, GNorm = 0.4200, lr_0 = 1.2007e-04
Loss = 3.0074e-02, PNorm = 71.5006, GNorm = 0.5320, lr_0 = 1.1999e-04
Loss = 3.3972e-02, PNorm = 71.5028, GNorm = 0.5418, lr_0 = 1.1991e-04
Loss = 3.1524e-02, PNorm = 71.5042, GNorm = 0.5075, lr_0 = 1.1982e-04
Loss = 3.2868e-02, PNorm = 71.5062, GNorm = 0.4994, lr_0 = 1.1974e-04
Loss = 3.3695e-02, PNorm = 71.5071, GNorm = 0.6895, lr_0 = 1.1966e-04
Loss = 3.1780e-02, PNorm = 71.5074, GNorm = 0.5339, lr_0 = 1.1958e-04
Loss = 3.1724e-02, PNorm = 71.5086, GNorm = 0.5230, lr_0 = 1.1950e-04
Loss = 3.4162e-02, PNorm = 71.5107, GNorm = 0.5291, lr_0 = 1.1941e-04
Loss = 3.3844e-02, PNorm = 71.5122, GNorm = 0.4144, lr_0 = 1.1933e-04
Loss = 3.8013e-02, PNorm = 71.5140, GNorm = 0.5007, lr_0 = 1.1925e-04
Loss = 3.5120e-02, PNorm = 71.5162, GNorm = 0.4708, lr_0 = 1.1917e-04
Loss = 3.1916e-02, PNorm = 71.5179, GNorm = 0.5927, lr_0 = 1.1909e-04
Loss = 4.1522e-02, PNorm = 71.5204, GNorm = 0.5369, lr_0 = 1.1901e-04
Loss = 3.2126e-02, PNorm = 71.5221, GNorm = 0.6296, lr_0 = 1.1892e-04
Loss = 3.2680e-02, PNorm = 71.5228, GNorm = 0.6082, lr_0 = 1.1884e-04
Loss = 2.8578e-02, PNorm = 71.5246, GNorm = 0.4008, lr_0 = 1.1876e-04
Loss = 3.2040e-02, PNorm = 71.5251, GNorm = 0.5035, lr_0 = 1.1868e-04
Loss = 4.1296e-02, PNorm = 71.5272, GNorm = 0.4334, lr_0 = 1.1860e-04
Loss = 3.5418e-02, PNorm = 71.5286, GNorm = 0.4832, lr_0 = 1.1852e-04
Loss = 3.3970e-02, PNorm = 71.5303, GNorm = 0.4447, lr_0 = 1.1844e-04
Loss = 3.1646e-02, PNorm = 71.5313, GNorm = 0.6052, lr_0 = 1.1835e-04
Loss = 3.3118e-02, PNorm = 71.5334, GNorm = 0.4693, lr_0 = 1.1827e-04
Loss = 3.2207e-02, PNorm = 71.5356, GNorm = 0.5615, lr_0 = 1.1819e-04
Loss = 3.6530e-02, PNorm = 71.5371, GNorm = 0.5324, lr_0 = 1.1811e-04
Loss = 3.1112e-02, PNorm = 71.5383, GNorm = 0.4939, lr_0 = 1.1803e-04
Loss = 3.1122e-02, PNorm = 71.5398, GNorm = 0.5887, lr_0 = 1.1795e-04
Loss = 3.0994e-02, PNorm = 71.5431, GNorm = 0.3846, lr_0 = 1.1787e-04
Validation mae = 0.388308
Epoch 28
Loss = 2.6943e-02, PNorm = 71.5446, GNorm = 0.3647, lr_0 = 1.1779e-04
Loss = 2.8503e-02, PNorm = 71.5457, GNorm = 0.5123, lr_0 = 1.1771e-04
Loss = 2.7907e-02, PNorm = 71.5474, GNorm = 0.6737, lr_0 = 1.1763e-04
Loss = 2.8538e-02, PNorm = 71.5486, GNorm = 0.4446, lr_0 = 1.1755e-04
Loss = 2.8899e-02, PNorm = 71.5507, GNorm = 0.5182, lr_0 = 1.1747e-04
Loss = 2.4411e-02, PNorm = 71.5526, GNorm = 0.4989, lr_0 = 1.1739e-04
Loss = 2.7763e-02, PNorm = 71.5540, GNorm = 0.4789, lr_0 = 1.1730e-04
Loss = 2.8529e-02, PNorm = 71.5555, GNorm = 0.4940, lr_0 = 1.1722e-04
Loss = 2.9059e-02, PNorm = 71.5576, GNorm = 0.4179, lr_0 = 1.1714e-04
Loss = 2.7721e-02, PNorm = 71.5581, GNorm = 0.3733, lr_0 = 1.1706e-04
Loss = 2.9096e-02, PNorm = 71.5587, GNorm = 0.3750, lr_0 = 1.1698e-04
Loss = 2.7713e-02, PNorm = 71.5595, GNorm = 0.5169, lr_0 = 1.1690e-04
Loss = 2.5023e-02, PNorm = 71.5613, GNorm = 0.5245, lr_0 = 1.1682e-04
Loss = 2.6143e-02, PNorm = 71.5628, GNorm = 0.4529, lr_0 = 1.1674e-04
Loss = 2.9815e-02, PNorm = 71.5649, GNorm = 0.3147, lr_0 = 1.1666e-04
Loss = 2.6531e-02, PNorm = 71.5666, GNorm = 0.4818, lr_0 = 1.1658e-04
Loss = 2.7815e-02, PNorm = 71.5679, GNorm = 0.5183, lr_0 = 1.1650e-04
Loss = 2.7546e-02, PNorm = 71.5697, GNorm = 0.5596, lr_0 = 1.1642e-04
Loss = 3.3351e-02, PNorm = 71.5701, GNorm = 0.8140, lr_0 = 1.1634e-04
Loss = 2.4782e-02, PNorm = 71.5718, GNorm = 0.3956, lr_0 = 1.1626e-04
Loss = 3.0367e-02, PNorm = 71.5742, GNorm = 0.5273, lr_0 = 1.1618e-04
Loss = 2.6158e-02, PNorm = 71.5772, GNorm = 0.4696, lr_0 = 1.1611e-04
Loss = 2.8431e-02, PNorm = 71.5783, GNorm = 0.4373, lr_0 = 1.1603e-04
Loss = 3.1168e-02, PNorm = 71.5796, GNorm = 0.5522, lr_0 = 1.1595e-04
Loss = 3.1900e-02, PNorm = 71.5807, GNorm = 0.4257, lr_0 = 1.1587e-04
Loss = 3.0940e-02, PNorm = 71.5824, GNorm = 0.4759, lr_0 = 1.1579e-04
Loss = 3.4334e-02, PNorm = 71.5837, GNorm = 0.6030, lr_0 = 1.1571e-04
Loss = 3.1082e-02, PNorm = 71.5846, GNorm = 0.4116, lr_0 = 1.1563e-04
Loss = 2.7852e-02, PNorm = 71.5860, GNorm = 0.5855, lr_0 = 1.1555e-04
Loss = 3.0810e-02, PNorm = 71.5891, GNorm = 0.5638, lr_0 = 1.1547e-04
Loss = 3.6132e-02, PNorm = 71.5902, GNorm = 0.4621, lr_0 = 1.1539e-04
Loss = 2.8919e-02, PNorm = 71.5921, GNorm = 0.4368, lr_0 = 1.1531e-04
Loss = 3.1616e-02, PNorm = 71.5957, GNorm = 0.7748, lr_0 = 1.1523e-04
Loss = 3.1546e-02, PNorm = 71.5968, GNorm = 0.4891, lr_0 = 1.1515e-04
Loss = 3.2585e-02, PNorm = 71.5976, GNorm = 0.6131, lr_0 = 1.1508e-04
Loss = 3.0575e-02, PNorm = 71.5990, GNorm = 0.4816, lr_0 = 1.1500e-04
Loss = 3.1205e-02, PNorm = 71.5994, GNorm = 0.5850, lr_0 = 1.1492e-04
Loss = 2.9161e-02, PNorm = 71.5998, GNorm = 0.4596, lr_0 = 1.1484e-04
Loss = 3.0640e-02, PNorm = 71.6018, GNorm = 0.3871, lr_0 = 1.1476e-04
Loss = 2.9489e-02, PNorm = 71.6038, GNorm = 0.4417, lr_0 = 1.1468e-04
Loss = 3.0136e-02, PNorm = 71.6050, GNorm = 0.6036, lr_0 = 1.1460e-04
Loss = 3.3410e-02, PNorm = 71.6070, GNorm = 0.6219, lr_0 = 1.1452e-04
Loss = 3.1325e-02, PNorm = 71.6088, GNorm = 0.6358, lr_0 = 1.1445e-04
Loss = 2.9765e-02, PNorm = 71.6104, GNorm = 0.5577, lr_0 = 1.1437e-04
Loss = 2.9785e-02, PNorm = 71.6122, GNorm = 0.4771, lr_0 = 1.1429e-04
Loss = 2.6529e-02, PNorm = 71.6141, GNorm = 0.4398, lr_0 = 1.1421e-04
Loss = 3.3081e-02, PNorm = 71.6159, GNorm = 0.3727, lr_0 = 1.1413e-04
Loss = 2.9231e-02, PNorm = 71.6182, GNorm = 0.4937, lr_0 = 1.1405e-04
Loss = 3.0817e-02, PNorm = 71.6191, GNorm = 0.6604, lr_0 = 1.1398e-04
Loss = 3.4469e-02, PNorm = 71.6209, GNorm = 0.7514, lr_0 = 1.1390e-04
Loss = 2.7394e-02, PNorm = 71.6217, GNorm = 0.3225, lr_0 = 1.1382e-04
Loss = 3.2156e-02, PNorm = 71.6222, GNorm = 0.4927, lr_0 = 1.1374e-04
Loss = 3.3530e-02, PNorm = 71.6240, GNorm = 0.6312, lr_0 = 1.1366e-04
Loss = 3.0126e-02, PNorm = 71.6266, GNorm = 0.4935, lr_0 = 1.1359e-04
Loss = 3.1593e-02, PNorm = 71.6278, GNorm = 0.3948, lr_0 = 1.1351e-04
Loss = 3.1405e-02, PNorm = 71.6283, GNorm = 0.3626, lr_0 = 1.1343e-04
Loss = 2.7594e-02, PNorm = 71.6293, GNorm = 0.3719, lr_0 = 1.1335e-04
Loss = 3.4209e-02, PNorm = 71.6307, GNorm = 0.4786, lr_0 = 1.1328e-04
Loss = 3.0325e-02, PNorm = 71.6312, GNorm = 0.4341, lr_0 = 1.1320e-04
Loss = 3.6367e-02, PNorm = 71.6315, GNorm = 0.4189, lr_0 = 1.1312e-04
Loss = 3.2246e-02, PNorm = 71.6328, GNorm = 0.4376, lr_0 = 1.1304e-04
Loss = 3.2421e-02, PNorm = 71.6347, GNorm = 0.4604, lr_0 = 1.1297e-04
Loss = 3.0154e-02, PNorm = 71.6359, GNorm = 0.5153, lr_0 = 1.1289e-04
Loss = 2.8734e-02, PNorm = 71.6367, GNorm = 0.4420, lr_0 = 1.1281e-04
Loss = 3.9922e-02, PNorm = 71.6383, GNorm = 0.5214, lr_0 = 1.1273e-04
Loss = 3.2376e-02, PNorm = 71.6406, GNorm = 0.5512, lr_0 = 1.1266e-04
Loss = 3.1634e-02, PNorm = 71.6430, GNorm = 0.3944, lr_0 = 1.1258e-04
Loss = 3.4314e-02, PNorm = 71.6461, GNorm = 0.4742, lr_0 = 1.1250e-04
Loss = 2.5770e-02, PNorm = 71.6486, GNorm = 0.4281, lr_0 = 1.1243e-04
Loss = 3.5863e-02, PNorm = 71.6497, GNorm = 0.5702, lr_0 = 1.1235e-04
Loss = 3.4572e-02, PNorm = 71.6512, GNorm = 0.4788, lr_0 = 1.1227e-04
Loss = 3.1575e-02, PNorm = 71.6530, GNorm = 0.3895, lr_0 = 1.1219e-04
Loss = 3.6554e-02, PNorm = 71.6540, GNorm = 0.7135, lr_0 = 1.1212e-04
Loss = 3.1097e-02, PNorm = 71.6558, GNorm = 0.4494, lr_0 = 1.1204e-04
Loss = 3.3419e-02, PNorm = 71.6579, GNorm = 0.4239, lr_0 = 1.1196e-04
Loss = 2.9072e-02, PNorm = 71.6593, GNorm = 0.5653, lr_0 = 1.1189e-04
Loss = 2.9743e-02, PNorm = 71.6612, GNorm = 0.4262, lr_0 = 1.1181e-04
Loss = 2.6049e-02, PNorm = 71.6626, GNorm = 0.4101, lr_0 = 1.1173e-04
Loss = 2.8446e-02, PNorm = 71.6632, GNorm = 0.6464, lr_0 = 1.1166e-04
Loss = 3.8784e-02, PNorm = 71.6642, GNorm = 0.6560, lr_0 = 1.1158e-04
Loss = 3.0006e-02, PNorm = 71.6654, GNorm = 0.4320, lr_0 = 1.1150e-04
Loss = 3.7704e-02, PNorm = 71.6675, GNorm = 0.5650, lr_0 = 1.1143e-04
Loss = 2.8518e-02, PNorm = 71.6693, GNorm = 0.4052, lr_0 = 1.1135e-04
Loss = 3.0960e-02, PNorm = 71.6706, GNorm = 0.3459, lr_0 = 1.1128e-04
Loss = 2.7265e-02, PNorm = 71.6719, GNorm = 0.4362, lr_0 = 1.1120e-04
Loss = 2.7327e-02, PNorm = 71.6736, GNorm = 0.3162, lr_0 = 1.1112e-04
Loss = 2.6398e-02, PNorm = 71.6751, GNorm = 0.4391, lr_0 = 1.1105e-04
Loss = 2.5702e-02, PNorm = 71.6768, GNorm = 0.3867, lr_0 = 1.1097e-04
Loss = 3.1519e-02, PNorm = 71.6787, GNorm = 0.4082, lr_0 = 1.1089e-04
Loss = 2.9116e-02, PNorm = 71.6801, GNorm = 0.4735, lr_0 = 1.1082e-04
Loss = 3.0175e-02, PNorm = 71.6809, GNorm = 0.7854, lr_0 = 1.1074e-04
Loss = 3.0107e-02, PNorm = 71.6814, GNorm = 0.3498, lr_0 = 1.1067e-04
Loss = 2.8961e-02, PNorm = 71.6826, GNorm = 0.4230, lr_0 = 1.1059e-04
Loss = 3.6546e-02, PNorm = 71.6841, GNorm = 0.4565, lr_0 = 1.1052e-04
Loss = 2.9591e-02, PNorm = 71.6856, GNorm = 0.5046, lr_0 = 1.1044e-04
Loss = 3.2375e-02, PNorm = 71.6865, GNorm = 0.6981, lr_0 = 1.1036e-04
Loss = 2.8980e-02, PNorm = 71.6884, GNorm = 0.4998, lr_0 = 1.1029e-04
Loss = 3.4909e-02, PNorm = 71.6893, GNorm = 0.4407, lr_0 = 1.1021e-04
Loss = 2.5417e-02, PNorm = 71.6899, GNorm = 0.3942, lr_0 = 1.1014e-04
Loss = 3.0454e-02, PNorm = 71.6911, GNorm = 0.2975, lr_0 = 1.1006e-04
Loss = 2.9435e-02, PNorm = 71.6922, GNorm = 0.4997, lr_0 = 1.0999e-04
Loss = 3.3695e-02, PNorm = 71.6935, GNorm = 0.5348, lr_0 = 1.0991e-04
Loss = 3.0603e-02, PNorm = 71.6946, GNorm = 0.5054, lr_0 = 1.0984e-04
Loss = 3.3271e-02, PNorm = 71.6957, GNorm = 0.7755, lr_0 = 1.0976e-04
Loss = 3.0809e-02, PNorm = 71.6980, GNorm = 0.3659, lr_0 = 1.0969e-04
Loss = 3.5137e-02, PNorm = 71.6999, GNorm = 0.6201, lr_0 = 1.0961e-04
Loss = 3.2434e-02, PNorm = 71.7021, GNorm = 0.4603, lr_0 = 1.0954e-04
Loss = 3.2981e-02, PNorm = 71.7037, GNorm = 0.8711, lr_0 = 1.0946e-04
Loss = 3.1387e-02, PNorm = 71.7038, GNorm = 0.9541, lr_0 = 1.0939e-04
Loss = 3.3981e-02, PNorm = 71.7047, GNorm = 0.3586, lr_0 = 1.0931e-04
Loss = 3.0701e-02, PNorm = 71.7068, GNorm = 0.4477, lr_0 = 1.0924e-04
Loss = 3.1208e-02, PNorm = 71.7083, GNorm = 0.5310, lr_0 = 1.0916e-04
Loss = 3.1715e-02, PNorm = 71.7094, GNorm = 0.4329, lr_0 = 1.0909e-04
Loss = 3.4212e-02, PNorm = 71.7105, GNorm = 0.5868, lr_0 = 1.0901e-04
Loss = 2.8655e-02, PNorm = 71.7121, GNorm = 0.4651, lr_0 = 1.0894e-04
Loss = 3.2132e-02, PNorm = 71.7136, GNorm = 0.4230, lr_0 = 1.0886e-04
Loss = 3.4680e-02, PNorm = 71.7147, GNorm = 0.5696, lr_0 = 1.0879e-04
Loss = 2.6326e-02, PNorm = 71.7166, GNorm = 0.4717, lr_0 = 1.0871e-04
Loss = 2.6989e-02, PNorm = 71.7185, GNorm = 0.3926, lr_0 = 1.0864e-04
Loss = 3.3973e-02, PNorm = 71.7194, GNorm = 0.5849, lr_0 = 1.0856e-04
Validation mae = 0.391395
Epoch 29
Loss = 2.2036e-02, PNorm = 71.7207, GNorm = 0.3514, lr_0 = 1.0849e-04
Loss = 2.7323e-02, PNorm = 71.7222, GNorm = 0.4572, lr_0 = 1.0841e-04
Loss = 2.7501e-02, PNorm = 71.7232, GNorm = 0.3392, lr_0 = 1.0834e-04
Loss = 2.5348e-02, PNorm = 71.7249, GNorm = 0.3881, lr_0 = 1.0827e-04
Loss = 2.8798e-02, PNorm = 71.7271, GNorm = 0.3923, lr_0 = 1.0819e-04
Loss = 2.8291e-02, PNorm = 71.7290, GNorm = 0.5155, lr_0 = 1.0812e-04
Loss = 2.5436e-02, PNorm = 71.7301, GNorm = 0.5209, lr_0 = 1.0804e-04
Loss = 2.8364e-02, PNorm = 71.7315, GNorm = 0.3949, lr_0 = 1.0797e-04
Loss = 2.5851e-02, PNorm = 71.7334, GNorm = 0.3859, lr_0 = 1.0790e-04
Loss = 3.4367e-02, PNorm = 71.7352, GNorm = 0.6097, lr_0 = 1.0782e-04
Loss = 2.7190e-02, PNorm = 71.7361, GNorm = 0.5061, lr_0 = 1.0775e-04
Loss = 2.8819e-02, PNorm = 71.7372, GNorm = 0.4420, lr_0 = 1.0767e-04
Loss = 3.0068e-02, PNorm = 71.7387, GNorm = 0.4348, lr_0 = 1.0760e-04
Loss = 2.6316e-02, PNorm = 71.7398, GNorm = 0.3167, lr_0 = 1.0753e-04
Loss = 2.4572e-02, PNorm = 71.7413, GNorm = 0.6515, lr_0 = 1.0745e-04
Loss = 3.4583e-02, PNorm = 71.7426, GNorm = 0.4092, lr_0 = 1.0738e-04
Loss = 2.1154e-02, PNorm = 71.7449, GNorm = 0.3242, lr_0 = 1.0731e-04
Loss = 3.6210e-02, PNorm = 71.7463, GNorm = 0.4675, lr_0 = 1.0723e-04
Loss = 2.7239e-02, PNorm = 71.7472, GNorm = 0.4004, lr_0 = 1.0716e-04
Loss = 2.9566e-02, PNorm = 71.7484, GNorm = 0.4515, lr_0 = 1.0709e-04
Loss = 3.0219e-02, PNorm = 71.7504, GNorm = 0.4654, lr_0 = 1.0701e-04
Loss = 3.5429e-02, PNorm = 71.7515, GNorm = 0.7387, lr_0 = 1.0694e-04
Loss = 3.1085e-02, PNorm = 71.7529, GNorm = 0.5875, lr_0 = 1.0687e-04
Loss = 2.9739e-02, PNorm = 71.7543, GNorm = 0.5498, lr_0 = 1.0679e-04
Loss = 3.4030e-02, PNorm = 71.7563, GNorm = 0.6250, lr_0 = 1.0672e-04
Loss = 2.6288e-02, PNorm = 71.7576, GNorm = 0.4901, lr_0 = 1.0665e-04
Loss = 2.6943e-02, PNorm = 71.7588, GNorm = 0.4078, lr_0 = 1.0657e-04
Loss = 2.7591e-02, PNorm = 71.7601, GNorm = 0.5629, lr_0 = 1.0650e-04
Loss = 2.8903e-02, PNorm = 71.7616, GNorm = 0.5467, lr_0 = 1.0643e-04
Loss = 3.4617e-02, PNorm = 71.7630, GNorm = 0.3800, lr_0 = 1.0635e-04
Loss = 2.4945e-02, PNorm = 71.7650, GNorm = 0.3886, lr_0 = 1.0628e-04
Loss = 2.7958e-02, PNorm = 71.7660, GNorm = 0.4505, lr_0 = 1.0621e-04
Loss = 3.2722e-02, PNorm = 71.7669, GNorm = 0.4448, lr_0 = 1.0614e-04
Loss = 2.9019e-02, PNorm = 71.7679, GNorm = 0.4978, lr_0 = 1.0606e-04
Loss = 2.5949e-02, PNorm = 71.7702, GNorm = 0.3327, lr_0 = 1.0599e-04
Loss = 2.8949e-02, PNorm = 71.7715, GNorm = 0.4704, lr_0 = 1.0592e-04
Loss = 3.0505e-02, PNorm = 71.7726, GNorm = 0.3919, lr_0 = 1.0585e-04
Loss = 2.4482e-02, PNorm = 71.7740, GNorm = 0.3611, lr_0 = 1.0577e-04
Loss = 3.1557e-02, PNorm = 71.7755, GNorm = 0.5719, lr_0 = 1.0570e-04
Loss = 2.8820e-02, PNorm = 71.7769, GNorm = 0.3444, lr_0 = 1.0563e-04
Loss = 2.6365e-02, PNorm = 71.7784, GNorm = 0.2969, lr_0 = 1.0556e-04
Loss = 2.8338e-02, PNorm = 71.7789, GNorm = 0.4380, lr_0 = 1.0548e-04
Loss = 2.7860e-02, PNorm = 71.7802, GNorm = 0.3130, lr_0 = 1.0541e-04
Loss = 2.6345e-02, PNorm = 71.7811, GNorm = 0.4366, lr_0 = 1.0534e-04
Loss = 2.5036e-02, PNorm = 71.7821, GNorm = 0.3975, lr_0 = 1.0527e-04
Loss = 2.9341e-02, PNorm = 71.7826, GNorm = 0.6122, lr_0 = 1.0519e-04
Loss = 3.0121e-02, PNorm = 71.7837, GNorm = 0.4771, lr_0 = 1.0512e-04
Loss = 2.8549e-02, PNorm = 71.7855, GNorm = 0.4677, lr_0 = 1.0505e-04
Loss = 2.5307e-02, PNorm = 71.7880, GNorm = 0.4235, lr_0 = 1.0498e-04
Loss = 2.5669e-02, PNorm = 71.7890, GNorm = 0.4274, lr_0 = 1.0491e-04
Loss = 2.6875e-02, PNorm = 71.7903, GNorm = 0.5371, lr_0 = 1.0483e-04
Loss = 2.1701e-02, PNorm = 71.7916, GNorm = 0.4541, lr_0 = 1.0476e-04
Loss = 2.9720e-02, PNorm = 71.7931, GNorm = 0.4070, lr_0 = 1.0469e-04
Loss = 2.4076e-02, PNorm = 71.7937, GNorm = 0.4031, lr_0 = 1.0462e-04
Loss = 2.9813e-02, PNorm = 71.7944, GNorm = 0.4596, lr_0 = 1.0455e-04
Loss = 2.3924e-02, PNorm = 71.7958, GNorm = 0.3451, lr_0 = 1.0448e-04
Loss = 2.8351e-02, PNorm = 71.7976, GNorm = 0.5753, lr_0 = 1.0440e-04
Loss = 3.0224e-02, PNorm = 71.7981, GNorm = 0.3936, lr_0 = 1.0433e-04
Loss = 2.6973e-02, PNorm = 71.7995, GNorm = 0.3828, lr_0 = 1.0426e-04
Loss = 3.0507e-02, PNorm = 71.8004, GNorm = 0.4324, lr_0 = 1.0419e-04
Loss = 2.5746e-02, PNorm = 71.8015, GNorm = 0.6185, lr_0 = 1.0412e-04
Loss = 3.0482e-02, PNorm = 71.8033, GNorm = 0.8490, lr_0 = 1.0405e-04
Loss = 3.0169e-02, PNorm = 71.8053, GNorm = 0.5696, lr_0 = 1.0398e-04
Loss = 3.1613e-02, PNorm = 71.8082, GNorm = 0.3679, lr_0 = 1.0391e-04
Loss = 3.3654e-02, PNorm = 71.8110, GNorm = 0.4252, lr_0 = 1.0383e-04
Loss = 3.0683e-02, PNorm = 71.8130, GNorm = 0.4895, lr_0 = 1.0376e-04
Loss = 3.2509e-02, PNorm = 71.8147, GNorm = 0.8175, lr_0 = 1.0369e-04
Loss = 2.7648e-02, PNorm = 71.8163, GNorm = 0.3980, lr_0 = 1.0362e-04
Loss = 2.8961e-02, PNorm = 71.8178, GNorm = 0.4463, lr_0 = 1.0355e-04
Loss = 2.8281e-02, PNorm = 71.8189, GNorm = 0.6341, lr_0 = 1.0348e-04
Loss = 3.0745e-02, PNorm = 71.8198, GNorm = 0.4119, lr_0 = 1.0341e-04
Loss = 3.1931e-02, PNorm = 71.8209, GNorm = 0.6899, lr_0 = 1.0334e-04
Loss = 3.8712e-02, PNorm = 71.8221, GNorm = 0.5977, lr_0 = 1.0327e-04
Loss = 2.9195e-02, PNorm = 71.8236, GNorm = 0.4126, lr_0 = 1.0320e-04
Loss = 3.1441e-02, PNorm = 71.8256, GNorm = 0.5956, lr_0 = 1.0312e-04
Loss = 2.8658e-02, PNorm = 71.8279, GNorm = 0.4291, lr_0 = 1.0305e-04
Loss = 2.9880e-02, PNorm = 71.8294, GNorm = 0.4731, lr_0 = 1.0298e-04
Loss = 2.7312e-02, PNorm = 71.8314, GNorm = 0.5215, lr_0 = 1.0291e-04
Loss = 3.1514e-02, PNorm = 71.8329, GNorm = 0.5842, lr_0 = 1.0284e-04
Loss = 3.1197e-02, PNorm = 71.8338, GNorm = 0.3967, lr_0 = 1.0277e-04
Loss = 2.6596e-02, PNorm = 71.8350, GNorm = 0.3986, lr_0 = 1.0270e-04
Loss = 2.6862e-02, PNorm = 71.8370, GNorm = 0.5170, lr_0 = 1.0263e-04
Loss = 2.8261e-02, PNorm = 71.8387, GNorm = 0.3865, lr_0 = 1.0256e-04
Loss = 3.1947e-02, PNorm = 71.8407, GNorm = 0.4350, lr_0 = 1.0249e-04
Loss = 2.7183e-02, PNorm = 71.8422, GNorm = 0.3436, lr_0 = 1.0242e-04
Loss = 3.1122e-02, PNorm = 71.8431, GNorm = 0.3432, lr_0 = 1.0235e-04
Loss = 3.6050e-02, PNorm = 71.8442, GNorm = 0.6031, lr_0 = 1.0228e-04
Loss = 3.0042e-02, PNorm = 71.8453, GNorm = 0.4324, lr_0 = 1.0221e-04
Loss = 2.9673e-02, PNorm = 71.8464, GNorm = 0.4040, lr_0 = 1.0214e-04
Loss = 3.0869e-02, PNorm = 71.8468, GNorm = 0.3943, lr_0 = 1.0207e-04
Loss = 2.9086e-02, PNorm = 71.8477, GNorm = 0.4300, lr_0 = 1.0200e-04
Loss = 3.1000e-02, PNorm = 71.8476, GNorm = 0.4593, lr_0 = 1.0193e-04
Loss = 2.8318e-02, PNorm = 71.8477, GNorm = 0.5630, lr_0 = 1.0186e-04
Loss = 2.7787e-02, PNorm = 71.8491, GNorm = 0.3844, lr_0 = 1.0179e-04
Loss = 3.0703e-02, PNorm = 71.8506, GNorm = 0.3855, lr_0 = 1.0172e-04
Loss = 2.9700e-02, PNorm = 71.8522, GNorm = 0.4947, lr_0 = 1.0165e-04
Loss = 3.0986e-02, PNorm = 71.8538, GNorm = 0.6121, lr_0 = 1.0158e-04
Loss = 2.7954e-02, PNorm = 71.8553, GNorm = 0.3459, lr_0 = 1.0151e-04
Loss = 3.3220e-02, PNorm = 71.8571, GNorm = 0.4500, lr_0 = 1.0144e-04
Loss = 2.6373e-02, PNorm = 71.8590, GNorm = 0.4019, lr_0 = 1.0137e-04
Loss = 2.8460e-02, PNorm = 71.8611, GNorm = 0.4384, lr_0 = 1.0130e-04
Loss = 3.1532e-02, PNorm = 71.8626, GNorm = 0.3471, lr_0 = 1.0123e-04
Loss = 2.7295e-02, PNorm = 71.8644, GNorm = 0.3629, lr_0 = 1.0116e-04
Loss = 3.0700e-02, PNorm = 71.8654, GNorm = 0.5957, lr_0 = 1.0110e-04
Loss = 2.7543e-02, PNorm = 71.8663, GNorm = 0.4312, lr_0 = 1.0103e-04
Loss = 3.6537e-02, PNorm = 71.8676, GNorm = 0.7026, lr_0 = 1.0096e-04
Loss = 2.9088e-02, PNorm = 71.8690, GNorm = 0.3791, lr_0 = 1.0089e-04
Loss = 3.3272e-02, PNorm = 71.8707, GNorm = 0.4221, lr_0 = 1.0082e-04
Loss = 2.8606e-02, PNorm = 71.8717, GNorm = 0.6438, lr_0 = 1.0075e-04
Loss = 3.1009e-02, PNorm = 71.8726, GNorm = 0.6005, lr_0 = 1.0068e-04
Loss = 3.1226e-02, PNorm = 71.8736, GNorm = 0.3619, lr_0 = 1.0061e-04
Loss = 3.4186e-02, PNorm = 71.8754, GNorm = 0.4673, lr_0 = 1.0054e-04
Loss = 3.3065e-02, PNorm = 71.8773, GNorm = 0.6533, lr_0 = 1.0047e-04
Loss = 2.7279e-02, PNorm = 71.8792, GNorm = 0.4261, lr_0 = 1.0041e-04
Loss = 2.6824e-02, PNorm = 71.8807, GNorm = 0.3323, lr_0 = 1.0034e-04
Loss = 3.4227e-02, PNorm = 71.8812, GNorm = 0.6240, lr_0 = 1.0027e-04
Loss = 2.7909e-02, PNorm = 71.8817, GNorm = 0.4845, lr_0 = 1.0020e-04
Loss = 2.6360e-02, PNorm = 71.8821, GNorm = 0.5190, lr_0 = 1.0013e-04
Loss = 3.3427e-02, PNorm = 71.8823, GNorm = 0.7317, lr_0 = 1.0006e-04
Loss = 3.0934e-02, PNorm = 71.8823, GNorm = 0.4759, lr_0 = 1.0000e-04
Validation mae = 0.390807
Model 0 best validation mae = 0.386677 on epoch 18
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Moving model to cuda
Model 0 test mae = 0.383287
Ensemble test mae = 0.383287
Fold 5
Splitting data with seed 5
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.0, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=400, bias=False)
        (W_h): Linear(in_features=400, out_features=400, bias=False)
        (W_o): Linear(in_features=533, out_features=400, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=400, out_features=400, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=400, out_features=1, bias=True)
  )
)
Number of parameters = 593,201
Moving model to cuda
Epoch 0
Loss = 8.9645e-01, PNorm = 38.3715, GNorm = 3.8565, lr_0 = 1.0413e-04
Loss = 8.7164e-01, PNorm = 38.3737, GNorm = 0.8335, lr_0 = 1.0788e-04
Loss = 7.5163e-01, PNorm = 38.3763, GNorm = 1.6546, lr_0 = 1.1163e-04
Loss = 7.7758e-01, PNorm = 38.3794, GNorm = 1.4094, lr_0 = 1.1537e-04
Loss = 6.1546e-01, PNorm = 38.3826, GNorm = 9.4156, lr_0 = 1.1913e-04
Loss = 7.4865e-01, PNorm = 38.3861, GNorm = 16.3390, lr_0 = 1.2287e-04
Loss = 6.9937e-01, PNorm = 38.3896, GNorm = 6.5974, lr_0 = 1.2663e-04
Loss = 6.0731e-01, PNorm = 38.3939, GNorm = 4.5851, lr_0 = 1.3038e-04
Loss = 6.2485e-01, PNorm = 38.3993, GNorm = 3.5646, lr_0 = 1.3413e-04
Loss = 7.3671e-01, PNorm = 38.4043, GNorm = 2.1524, lr_0 = 1.3788e-04
Loss = 6.1617e-01, PNorm = 38.4100, GNorm = 2.5468, lr_0 = 1.4163e-04
Loss = 5.8124e-01, PNorm = 38.4171, GNorm = 3.8306, lr_0 = 1.4537e-04
Loss = 6.3931e-01, PNorm = 38.4231, GNorm = 9.1971, lr_0 = 1.4913e-04
Loss = 6.2196e-01, PNorm = 38.4307, GNorm = 7.0363, lr_0 = 1.5288e-04
Loss = 5.3854e-01, PNorm = 38.4381, GNorm = 5.0646, lr_0 = 1.5662e-04
Loss = 5.1125e-01, PNorm = 38.4456, GNorm = 9.8858, lr_0 = 1.6038e-04
Loss = 4.7593e-01, PNorm = 38.4515, GNorm = 2.2043, lr_0 = 1.6412e-04
Loss = 4.7145e-01, PNorm = 38.4567, GNorm = 3.5119, lr_0 = 1.6788e-04
Loss = 4.8028e-01, PNorm = 38.4632, GNorm = 7.0157, lr_0 = 1.7163e-04
Loss = 4.3521e-01, PNorm = 38.4716, GNorm = 13.2487, lr_0 = 1.7538e-04
Loss = 4.3820e-01, PNorm = 38.4791, GNorm = 7.9365, lr_0 = 1.7913e-04
Loss = 4.7642e-01, PNorm = 38.4854, GNorm = 13.8554, lr_0 = 1.8288e-04
Loss = 4.1383e-01, PNorm = 38.4917, GNorm = 14.6935, lr_0 = 1.8662e-04
Loss = 4.2653e-01, PNorm = 38.4988, GNorm = 8.1699, lr_0 = 1.9038e-04
Loss = 4.2423e-01, PNorm = 38.5061, GNorm = 11.7880, lr_0 = 1.9413e-04
Loss = 3.7216e-01, PNorm = 38.5120, GNorm = 2.2308, lr_0 = 1.9788e-04
Loss = 4.3383e-01, PNorm = 38.5177, GNorm = 15.3211, lr_0 = 2.0163e-04
Loss = 3.7208e-01, PNorm = 38.5242, GNorm = 20.7312, lr_0 = 2.0537e-04
Loss = 3.9788e-01, PNorm = 38.5269, GNorm = 15.0781, lr_0 = 2.0913e-04
Loss = 3.9173e-01, PNorm = 38.5333, GNorm = 10.5213, lr_0 = 2.1288e-04
Loss = 4.2185e-01, PNorm = 38.5397, GNorm = 6.1096, lr_0 = 2.1663e-04
Loss = 3.4314e-01, PNorm = 38.5451, GNorm = 2.8584, lr_0 = 2.2038e-04
Loss = 3.5883e-01, PNorm = 38.5534, GNorm = 2.7468, lr_0 = 2.2412e-04
Loss = 3.5513e-01, PNorm = 38.5611, GNorm = 7.8865, lr_0 = 2.2787e-04
Loss = 3.6566e-01, PNorm = 38.5677, GNorm = 3.0928, lr_0 = 2.3163e-04
Loss = 3.5552e-01, PNorm = 38.5743, GNorm = 12.8551, lr_0 = 2.3538e-04
Loss = 4.0955e-01, PNorm = 38.5776, GNorm = 16.1484, lr_0 = 2.3913e-04
Loss = 3.5644e-01, PNorm = 38.5829, GNorm = 7.3492, lr_0 = 2.4288e-04
Loss = 3.4259e-01, PNorm = 38.5886, GNorm = 9.1144, lr_0 = 2.4662e-04
Loss = 3.4787e-01, PNorm = 38.5939, GNorm = 4.3969, lr_0 = 2.5038e-04
Loss = 3.0709e-01, PNorm = 38.5986, GNorm = 4.6739, lr_0 = 2.5413e-04
Loss = 3.4086e-01, PNorm = 38.6044, GNorm = 6.8848, lr_0 = 2.5788e-04
Loss = 3.7382e-01, PNorm = 38.6097, GNorm = 7.6398, lr_0 = 2.6163e-04
Loss = 3.6792e-01, PNorm = 38.6157, GNorm = 10.6038, lr_0 = 2.6537e-04
Loss = 3.6290e-01, PNorm = 38.6227, GNorm = 6.2911, lr_0 = 2.6912e-04
Loss = 3.7460e-01, PNorm = 38.6331, GNorm = 4.5832, lr_0 = 2.7288e-04
Loss = 3.0482e-01, PNorm = 38.6440, GNorm = 8.1760, lr_0 = 2.7663e-04
Loss = 3.2838e-01, PNorm = 38.6503, GNorm = 11.0937, lr_0 = 2.8038e-04
Loss = 3.6733e-01, PNorm = 38.6557, GNorm = 12.4067, lr_0 = 2.8413e-04
Loss = 3.4416e-01, PNorm = 38.6643, GNorm = 7.6551, lr_0 = 2.8787e-04
Loss = 3.8334e-01, PNorm = 38.6701, GNorm = 3.6624, lr_0 = 2.9163e-04
Loss = 3.3563e-01, PNorm = 38.6755, GNorm = 10.0214, lr_0 = 2.9538e-04
Loss = 3.1842e-01, PNorm = 38.6852, GNorm = 9.6723, lr_0 = 2.9913e-04
Loss = 3.4077e-01, PNorm = 38.6964, GNorm = 2.2597, lr_0 = 3.0288e-04
Loss = 2.9113e-01, PNorm = 38.7050, GNorm = 2.3996, lr_0 = 3.0662e-04
Loss = 3.1074e-01, PNorm = 38.7094, GNorm = 10.9006, lr_0 = 3.1037e-04
Loss = 3.2833e-01, PNorm = 38.7149, GNorm = 1.5005, lr_0 = 3.1413e-04
Loss = 3.4270e-01, PNorm = 38.7218, GNorm = 2.6503, lr_0 = 3.1788e-04
Loss = 2.8521e-01, PNorm = 38.7286, GNorm = 5.6114, lr_0 = 3.2163e-04
Loss = 3.4455e-01, PNorm = 38.7362, GNorm = 3.0549, lr_0 = 3.2538e-04
Loss = 2.9160e-01, PNorm = 38.7435, GNorm = 4.9883, lr_0 = 3.2912e-04
Loss = 3.1957e-01, PNorm = 38.7496, GNorm = 2.2787, lr_0 = 3.3288e-04
Loss = 3.6628e-01, PNorm = 38.7550, GNorm = 9.2315, lr_0 = 3.3663e-04
Loss = 2.9491e-01, PNorm = 38.7627, GNorm = 4.2519, lr_0 = 3.4038e-04
Loss = 3.2551e-01, PNorm = 38.7717, GNorm = 1.6187, lr_0 = 3.4413e-04
Loss = 3.3146e-01, PNorm = 38.7768, GNorm = 6.7532, lr_0 = 3.4787e-04
Loss = 2.9188e-01, PNorm = 38.7825, GNorm = 7.7879, lr_0 = 3.5162e-04
Loss = 2.8491e-01, PNorm = 38.7890, GNorm = 7.0887, lr_0 = 3.5538e-04
Loss = 3.7453e-01, PNorm = 38.7929, GNorm = 14.7649, lr_0 = 3.5913e-04
Loss = 3.4580e-01, PNorm = 38.8001, GNorm = 3.6805, lr_0 = 3.6288e-04
Loss = 3.0389e-01, PNorm = 38.8099, GNorm = 6.4929, lr_0 = 3.6662e-04
Loss = 3.1090e-01, PNorm = 38.8183, GNorm = 5.7535, lr_0 = 3.7037e-04
Loss = 3.2392e-01, PNorm = 38.8257, GNorm = 6.8045, lr_0 = 3.7413e-04
Loss = 3.0967e-01, PNorm = 38.8340, GNorm = 5.1397, lr_0 = 3.7788e-04
Loss = 3.3609e-01, PNorm = 38.8454, GNorm = 1.6786, lr_0 = 3.8163e-04
Loss = 2.7327e-01, PNorm = 38.8564, GNorm = 4.2151, lr_0 = 3.8537e-04
Loss = 3.0264e-01, PNorm = 38.8653, GNorm = 11.1624, lr_0 = 3.8912e-04
Loss = 2.8069e-01, PNorm = 38.8739, GNorm = 2.7590, lr_0 = 3.9287e-04
Loss = 2.8686e-01, PNorm = 38.8844, GNorm = 1.6119, lr_0 = 3.9663e-04
Loss = 2.9268e-01, PNorm = 38.8944, GNorm = 8.7911, lr_0 = 4.0038e-04
Loss = 3.1052e-01, PNorm = 38.9033, GNorm = 16.9221, lr_0 = 4.0413e-04
Loss = 3.0110e-01, PNorm = 38.9114, GNorm = 5.3427, lr_0 = 4.0787e-04
Loss = 2.6160e-01, PNorm = 38.9190, GNorm = 2.7251, lr_0 = 4.1162e-04
Loss = 2.5979e-01, PNorm = 38.9265, GNorm = 2.9010, lr_0 = 4.1537e-04
Loss = 3.1778e-01, PNorm = 38.9348, GNorm = 3.3244, lr_0 = 4.1913e-04
Loss = 3.2299e-01, PNorm = 38.9501, GNorm = 3.4324, lr_0 = 4.2288e-04
Loss = 3.0667e-01, PNorm = 38.9602, GNorm = 1.4867, lr_0 = 4.2662e-04
Loss = 2.9548e-01, PNorm = 38.9724, GNorm = 14.6027, lr_0 = 4.3037e-04
Loss = 2.8588e-01, PNorm = 38.9806, GNorm = 9.7609, lr_0 = 4.3412e-04
Loss = 3.1377e-01, PNorm = 38.9924, GNorm = 5.2039, lr_0 = 4.3788e-04
Loss = 2.5584e-01, PNorm = 39.0052, GNorm = 2.0460, lr_0 = 4.4163e-04
Loss = 2.7400e-01, PNorm = 39.0174, GNorm = 10.1966, lr_0 = 4.4538e-04
Loss = 2.8210e-01, PNorm = 39.0277, GNorm = 17.5431, lr_0 = 4.4912e-04
Loss = 2.9176e-01, PNorm = 39.0407, GNorm = 2.6489, lr_0 = 4.5287e-04
Loss = 2.7884e-01, PNorm = 39.0505, GNorm = 8.2195, lr_0 = 4.5662e-04
Loss = 3.1865e-01, PNorm = 39.0605, GNorm = 1.4935, lr_0 = 4.6038e-04
Loss = 2.9768e-01, PNorm = 39.0740, GNorm = 3.4713, lr_0 = 4.6413e-04
Loss = 3.2264e-01, PNorm = 39.0853, GNorm = 14.5405, lr_0 = 4.6787e-04
Loss = 2.7737e-01, PNorm = 39.0926, GNorm = 0.9652, lr_0 = 4.7162e-04
Loss = 2.9097e-01, PNorm = 39.1062, GNorm = 1.2197, lr_0 = 4.7537e-04
Loss = 3.2740e-01, PNorm = 39.1179, GNorm = 13.7732, lr_0 = 4.7913e-04
Loss = 2.5989e-01, PNorm = 39.1301, GNorm = 3.5900, lr_0 = 4.8288e-04
Loss = 2.4698e-01, PNorm = 39.1393, GNorm = 1.3513, lr_0 = 4.8663e-04
Loss = 2.7368e-01, PNorm = 39.1476, GNorm = 6.6344, lr_0 = 4.9038e-04
Loss = 2.7822e-01, PNorm = 39.1608, GNorm = 11.3548, lr_0 = 4.9412e-04
Loss = 2.5914e-01, PNorm = 39.1745, GNorm = 12.8017, lr_0 = 4.9788e-04
Loss = 3.3844e-01, PNorm = 39.1864, GNorm = 14.1008, lr_0 = 5.0163e-04
Loss = 2.8076e-01, PNorm = 39.1982, GNorm = 5.5319, lr_0 = 5.0538e-04
Loss = 2.9882e-01, PNorm = 39.2117, GNorm = 9.7544, lr_0 = 5.0913e-04
Loss = 2.8112e-01, PNorm = 39.2237, GNorm = 4.0252, lr_0 = 5.1287e-04
Loss = 3.4394e-01, PNorm = 39.2362, GNorm = 11.7748, lr_0 = 5.1663e-04
Loss = 2.8468e-01, PNorm = 39.2514, GNorm = 2.8563, lr_0 = 5.2038e-04
Loss = 2.4906e-01, PNorm = 39.2623, GNorm = 7.4575, lr_0 = 5.2413e-04
Loss = 3.5795e-01, PNorm = 39.2726, GNorm = 14.4643, lr_0 = 5.2788e-04
Loss = 3.2000e-01, PNorm = 39.2907, GNorm = 7.9470, lr_0 = 5.3162e-04
Loss = 2.5920e-01, PNorm = 39.3105, GNorm = 3.6786, lr_0 = 5.3538e-04
Loss = 2.9507e-01, PNorm = 39.3259, GNorm = 1.2971, lr_0 = 5.3912e-04
Loss = 2.4723e-01, PNorm = 39.3386, GNorm = 1.7763, lr_0 = 5.4288e-04
Loss = 2.9338e-01, PNorm = 39.3510, GNorm = 4.0096, lr_0 = 5.4663e-04
Loss = 2.3978e-01, PNorm = 39.3638, GNorm = 1.1560, lr_0 = 5.5038e-04
Validation mae = 0.635622
Epoch 1
Loss = 2.5849e-01, PNorm = 39.3771, GNorm = 6.2306, lr_0 = 5.5413e-04
Loss = 2.6287e-01, PNorm = 39.3955, GNorm = 5.8944, lr_0 = 5.5787e-04
Loss = 2.6483e-01, PNorm = 39.4060, GNorm = 1.9755, lr_0 = 5.6163e-04
Loss = 2.2287e-01, PNorm = 39.4187, GNorm = 7.0800, lr_0 = 5.6538e-04
Loss = 2.4929e-01, PNorm = 39.4298, GNorm = 3.8935, lr_0 = 5.6913e-04
Loss = 2.8793e-01, PNorm = 39.4437, GNorm = 2.0288, lr_0 = 5.7288e-04
Loss = 3.0587e-01, PNorm = 39.4608, GNorm = 3.8814, lr_0 = 5.7662e-04
Loss = 2.6748e-01, PNorm = 39.4777, GNorm = 1.4127, lr_0 = 5.8038e-04
Loss = 2.9278e-01, PNorm = 39.4892, GNorm = 8.1772, lr_0 = 5.8413e-04
Loss = 2.4481e-01, PNorm = 39.5082, GNorm = 1.9438, lr_0 = 5.8788e-04
Loss = 2.8318e-01, PNorm = 39.5268, GNorm = 9.6333, lr_0 = 5.9163e-04
Loss = 3.1720e-01, PNorm = 39.5432, GNorm = 12.5791, lr_0 = 5.9538e-04
Loss = 2.7373e-01, PNorm = 39.5629, GNorm = 4.4013, lr_0 = 5.9913e-04
Loss = 2.5399e-01, PNorm = 39.5851, GNorm = 2.2553, lr_0 = 6.0288e-04
Loss = 2.8285e-01, PNorm = 39.6074, GNorm = 3.0610, lr_0 = 6.0663e-04
Loss = 2.2099e-01, PNorm = 39.6232, GNorm = 1.9265, lr_0 = 6.1038e-04
Loss = 2.4504e-01, PNorm = 39.6398, GNorm = 5.2855, lr_0 = 6.1413e-04
Loss = 2.8080e-01, PNorm = 39.6510, GNorm = 2.1329, lr_0 = 6.1788e-04
Loss = 2.3799e-01, PNorm = 39.6592, GNorm = 1.2916, lr_0 = 6.2163e-04
Loss = 2.3482e-01, PNorm = 39.6722, GNorm = 5.9996, lr_0 = 6.2538e-04
Loss = 2.7764e-01, PNorm = 39.6846, GNorm = 1.2806, lr_0 = 6.2913e-04
Loss = 3.1366e-01, PNorm = 39.7010, GNorm = 11.6721, lr_0 = 6.3288e-04
Loss = 3.2772e-01, PNorm = 39.7201, GNorm = 10.3890, lr_0 = 6.3663e-04
Loss = 2.4825e-01, PNorm = 39.7422, GNorm = 2.3929, lr_0 = 6.4038e-04
Loss = 3.2041e-01, PNorm = 39.7631, GNorm = 6.0421, lr_0 = 6.4413e-04
Loss = 2.2182e-01, PNorm = 39.7860, GNorm = 3.9213, lr_0 = 6.4788e-04
Loss = 2.3052e-01, PNorm = 39.8055, GNorm = 7.8657, lr_0 = 6.5163e-04
Loss = 2.4336e-01, PNorm = 39.8179, GNorm = 1.2387, lr_0 = 6.5538e-04
Loss = 2.8436e-01, PNorm = 39.8368, GNorm = 4.4034, lr_0 = 6.5913e-04
Loss = 2.6861e-01, PNorm = 39.8552, GNorm = 4.3101, lr_0 = 6.6288e-04
Loss = 2.4931e-01, PNorm = 39.8756, GNorm = 7.0621, lr_0 = 6.6663e-04
Loss = 2.4710e-01, PNorm = 39.8905, GNorm = 1.4159, lr_0 = 6.7038e-04
Loss = 2.3830e-01, PNorm = 39.9065, GNorm = 3.1718, lr_0 = 6.7413e-04
Loss = 2.5500e-01, PNorm = 39.9250, GNorm = 4.6720, lr_0 = 6.7788e-04
Loss = 2.4727e-01, PNorm = 39.9400, GNorm = 2.8358, lr_0 = 6.8163e-04
Loss = 3.1678e-01, PNorm = 39.9612, GNorm = 6.5195, lr_0 = 6.8538e-04
Loss = 3.1479e-01, PNorm = 39.9840, GNorm = 8.1725, lr_0 = 6.8913e-04
Loss = 2.4427e-01, PNorm = 40.0077, GNorm = 1.7149, lr_0 = 6.9288e-04
Loss = 3.3260e-01, PNorm = 40.0299, GNorm = 3.0246, lr_0 = 6.9663e-04
Loss = 3.2234e-01, PNorm = 40.0519, GNorm = 3.2854, lr_0 = 7.0038e-04
Loss = 2.6771e-01, PNorm = 40.0756, GNorm = 5.1459, lr_0 = 7.0413e-04
Loss = 2.2373e-01, PNorm = 40.0965, GNorm = 2.1792, lr_0 = 7.0788e-04
Loss = 2.5508e-01, PNorm = 40.1193, GNorm = 4.0606, lr_0 = 7.1163e-04
Loss = 2.4089e-01, PNorm = 40.1382, GNorm = 2.6376, lr_0 = 7.1538e-04
Loss = 2.2009e-01, PNorm = 40.1602, GNorm = 4.1085, lr_0 = 7.1913e-04
Loss = 2.7104e-01, PNorm = 40.1855, GNorm = 3.8579, lr_0 = 7.2288e-04
Loss = 2.4004e-01, PNorm = 40.2044, GNorm = 3.9102, lr_0 = 7.2663e-04
Loss = 2.1714e-01, PNorm = 40.2260, GNorm = 1.9036, lr_0 = 7.3038e-04
Loss = 2.0520e-01, PNorm = 40.2432, GNorm = 1.4027, lr_0 = 7.3413e-04
Loss = 2.0347e-01, PNorm = 40.2553, GNorm = 1.3046, lr_0 = 7.3788e-04
Loss = 2.2801e-01, PNorm = 40.2657, GNorm = 5.8030, lr_0 = 7.4163e-04
Loss = 2.6594e-01, PNorm = 40.2784, GNorm = 8.0189, lr_0 = 7.4538e-04
Loss = 2.9918e-01, PNorm = 40.2888, GNorm = 8.3669, lr_0 = 7.4913e-04
Loss = 2.3421e-01, PNorm = 40.3054, GNorm = 1.3278, lr_0 = 7.5288e-04
Loss = 2.4669e-01, PNorm = 40.3274, GNorm = 3.0929, lr_0 = 7.5663e-04
Loss = 2.5841e-01, PNorm = 40.3430, GNorm = 0.9349, lr_0 = 7.6038e-04
Loss = 3.0666e-01, PNorm = 40.3626, GNorm = 4.6432, lr_0 = 7.6413e-04
Loss = 2.4097e-01, PNorm = 40.3875, GNorm = 2.5373, lr_0 = 7.6788e-04
Loss = 2.1736e-01, PNorm = 40.4121, GNorm = 2.9923, lr_0 = 7.7163e-04
Loss = 1.9931e-01, PNorm = 40.4322, GNorm = 3.1502, lr_0 = 7.7538e-04
Loss = 2.5274e-01, PNorm = 40.4438, GNorm = 0.8487, lr_0 = 7.7913e-04
Loss = 2.1813e-01, PNorm = 40.4614, GNorm = 7.0363, lr_0 = 7.8288e-04
Loss = 2.5463e-01, PNorm = 40.4844, GNorm = 1.2397, lr_0 = 7.8663e-04
Loss = 2.4798e-01, PNorm = 40.5139, GNorm = 3.3511, lr_0 = 7.9038e-04
Loss = 2.9909e-01, PNorm = 40.5304, GNorm = 7.9172, lr_0 = 7.9413e-04
Loss = 3.4016e-01, PNorm = 40.5548, GNorm = 3.4623, lr_0 = 7.9788e-04
Loss = 2.7109e-01, PNorm = 40.5848, GNorm = 1.9263, lr_0 = 8.0163e-04
Loss = 2.7116e-01, PNorm = 40.6199, GNorm = 4.2460, lr_0 = 8.0538e-04
Loss = 2.1873e-01, PNorm = 40.6467, GNorm = 1.0715, lr_0 = 8.0913e-04
Loss = 2.4089e-01, PNorm = 40.6612, GNorm = 0.8352, lr_0 = 8.1288e-04
Loss = 2.4213e-01, PNorm = 40.6734, GNorm = 2.7944, lr_0 = 8.1663e-04
Loss = 2.5962e-01, PNorm = 40.6954, GNorm = 1.4038, lr_0 = 8.2038e-04
Loss = 2.1735e-01, PNorm = 40.7202, GNorm = 4.9309, lr_0 = 8.2413e-04
Loss = 2.2699e-01, PNorm = 40.7397, GNorm = 1.8390, lr_0 = 8.2788e-04
Loss = 2.4555e-01, PNorm = 40.7598, GNorm = 5.0573, lr_0 = 8.3163e-04
Loss = 2.3881e-01, PNorm = 40.7809, GNorm = 2.0626, lr_0 = 8.3538e-04
Loss = 2.3881e-01, PNorm = 40.8025, GNorm = 3.1134, lr_0 = 8.3913e-04
Loss = 2.2777e-01, PNorm = 40.8292, GNorm = 4.1258, lr_0 = 8.4288e-04
Loss = 2.5432e-01, PNorm = 40.8555, GNorm = 5.7430, lr_0 = 8.4663e-04
Loss = 2.1934e-01, PNorm = 40.8885, GNorm = 2.0635, lr_0 = 8.5038e-04
Loss = 1.9247e-01, PNorm = 40.9175, GNorm = 1.1979, lr_0 = 8.5413e-04
Loss = 2.2582e-01, PNorm = 40.9316, GNorm = 3.0488, lr_0 = 8.5788e-04
Loss = 2.7570e-01, PNorm = 40.9537, GNorm = 4.4634, lr_0 = 8.6163e-04
Loss = 2.3732e-01, PNorm = 40.9854, GNorm = 3.5669, lr_0 = 8.6538e-04
Loss = 2.2762e-01, PNorm = 41.0141, GNorm = 1.0477, lr_0 = 8.6913e-04
Loss = 2.2522e-01, PNorm = 41.0351, GNorm = 3.0726, lr_0 = 8.7288e-04
Loss = 2.1692e-01, PNorm = 41.0589, GNorm = 2.7115, lr_0 = 8.7663e-04
Loss = 2.4892e-01, PNorm = 41.0840, GNorm = 3.0063, lr_0 = 8.8038e-04
Loss = 2.3177e-01, PNorm = 41.1121, GNorm = 2.5573, lr_0 = 8.8413e-04
Loss = 2.4997e-01, PNorm = 41.1326, GNorm = 5.1138, lr_0 = 8.8788e-04
Loss = 2.3460e-01, PNorm = 41.1514, GNorm = 1.6678, lr_0 = 8.9163e-04
Loss = 2.2795e-01, PNorm = 41.1816, GNorm = 1.1053, lr_0 = 8.9538e-04
Loss = 2.3511e-01, PNorm = 41.2008, GNorm = 4.8140, lr_0 = 8.9913e-04
Loss = 2.3657e-01, PNorm = 41.2220, GNorm = 2.6648, lr_0 = 9.0288e-04
Loss = 2.0637e-01, PNorm = 41.2459, GNorm = 2.2080, lr_0 = 9.0663e-04
Loss = 2.2190e-01, PNorm = 41.2636, GNorm = 0.8572, lr_0 = 9.1038e-04
Loss = 2.7318e-01, PNorm = 41.2869, GNorm = 1.8056, lr_0 = 9.1413e-04
Loss = 2.2432e-01, PNorm = 41.3244, GNorm = 2.0069, lr_0 = 9.1788e-04
Loss = 2.4522e-01, PNorm = 41.3573, GNorm = 1.7954, lr_0 = 9.2163e-04
Loss = 2.0312e-01, PNorm = 41.3938, GNorm = 3.2806, lr_0 = 9.2538e-04
Loss = 2.3714e-01, PNorm = 41.4178, GNorm = 3.5207, lr_0 = 9.2913e-04
Loss = 2.9342e-01, PNorm = 41.4525, GNorm = 5.1863, lr_0 = 9.3288e-04
Loss = 2.4019e-01, PNorm = 41.5006, GNorm = 3.3591, lr_0 = 9.3663e-04
Loss = 2.0350e-01, PNorm = 41.5361, GNorm = 2.1712, lr_0 = 9.4038e-04
Loss = 2.2377e-01, PNorm = 41.5597, GNorm = 3.1867, lr_0 = 9.4413e-04
Loss = 2.3475e-01, PNorm = 41.5811, GNorm = 2.2798, lr_0 = 9.4788e-04
Loss = 2.0924e-01, PNorm = 41.6051, GNorm = 0.9992, lr_0 = 9.5163e-04
Loss = 2.1456e-01, PNorm = 41.6283, GNorm = 0.9553, lr_0 = 9.5538e-04
Loss = 2.1733e-01, PNorm = 41.6604, GNorm = 1.8599, lr_0 = 9.5913e-04
Loss = 2.1515e-01, PNorm = 41.6810, GNorm = 2.4341, lr_0 = 9.6288e-04
Loss = 2.5696e-01, PNorm = 41.7130, GNorm = 0.8907, lr_0 = 9.6663e-04
Loss = 2.5111e-01, PNorm = 41.7372, GNorm = 2.2712, lr_0 = 9.7038e-04
Loss = 2.2636e-01, PNorm = 41.7622, GNorm = 3.5031, lr_0 = 9.7413e-04
Loss = 2.1278e-01, PNorm = 41.7852, GNorm = 1.7861, lr_0 = 9.7788e-04
Loss = 2.0939e-01, PNorm = 41.8152, GNorm = 4.5917, lr_0 = 9.8163e-04
Loss = 2.5214e-01, PNorm = 41.8353, GNorm = 0.7881, lr_0 = 9.8537e-04
Loss = 2.5084e-01, PNorm = 41.8620, GNorm = 2.5610, lr_0 = 9.8912e-04
Loss = 2.4645e-01, PNorm = 41.8997, GNorm = 1.3204, lr_0 = 9.9288e-04
Loss = 2.3559e-01, PNorm = 41.9370, GNorm = 2.3602, lr_0 = 9.9663e-04
Loss = 1.7277e-01, PNorm = 41.9664, GNorm = 0.8341, lr_0 = 9.9993e-04
Validation mae = 0.504641
Epoch 2
Loss = 1.9426e-01, PNorm = 41.9901, GNorm = 2.2693, lr_0 = 9.9925e-04
Loss = 2.0548e-01, PNorm = 42.0175, GNorm = 1.3175, lr_0 = 9.9856e-04
Loss = 2.0328e-01, PNorm = 42.0419, GNorm = 1.8239, lr_0 = 9.9788e-04
Loss = 2.1439e-01, PNorm = 42.0702, GNorm = 0.9058, lr_0 = 9.9719e-04
Loss = 2.0785e-01, PNorm = 42.0936, GNorm = 3.7253, lr_0 = 9.9651e-04
Loss = 2.0926e-01, PNorm = 42.1085, GNorm = 1.0764, lr_0 = 9.9583e-04
Loss = 2.1883e-01, PNorm = 42.1319, GNorm = 2.0630, lr_0 = 9.9515e-04
Loss = 1.9091e-01, PNorm = 42.1561, GNorm = 1.7635, lr_0 = 9.9446e-04
Loss = 2.0317e-01, PNorm = 42.1793, GNorm = 1.0002, lr_0 = 9.9378e-04
Loss = 2.0725e-01, PNorm = 42.2085, GNorm = 2.3362, lr_0 = 9.9310e-04
Loss = 2.3275e-01, PNorm = 42.2348, GNorm = 1.5213, lr_0 = 9.9242e-04
Loss = 2.0622e-01, PNorm = 42.2594, GNorm = 1.5778, lr_0 = 9.9174e-04
Loss = 1.9655e-01, PNorm = 42.2852, GNorm = 2.3946, lr_0 = 9.9106e-04
Loss = 2.2880e-01, PNorm = 42.3138, GNorm = 2.0611, lr_0 = 9.9038e-04
Loss = 2.0484e-01, PNorm = 42.3466, GNorm = 2.2142, lr_0 = 9.8971e-04
Loss = 1.9634e-01, PNorm = 42.3691, GNorm = 0.8926, lr_0 = 9.8903e-04
Loss = 1.9341e-01, PNorm = 42.3936, GNorm = 0.9973, lr_0 = 9.8835e-04
Loss = 1.9474e-01, PNorm = 42.4220, GNorm = 2.4025, lr_0 = 9.8767e-04
Loss = 1.7211e-01, PNorm = 42.4452, GNorm = 1.5454, lr_0 = 9.8700e-04
Loss = 2.0865e-01, PNorm = 42.4734, GNorm = 2.7786, lr_0 = 9.8632e-04
Loss = 1.9148e-01, PNorm = 42.4935, GNorm = 0.7705, lr_0 = 9.8564e-04
Loss = 1.8577e-01, PNorm = 42.5196, GNorm = 2.2349, lr_0 = 9.8497e-04
Loss = 2.1299e-01, PNorm = 42.5380, GNorm = 2.9827, lr_0 = 9.8429e-04
Loss = 2.0245e-01, PNorm = 42.5685, GNorm = 3.7083, lr_0 = 9.8362e-04
Loss = 2.1236e-01, PNorm = 42.5968, GNorm = 3.3070, lr_0 = 9.8295e-04
Loss = 1.7329e-01, PNorm = 42.6177, GNorm = 1.5013, lr_0 = 9.8227e-04
Loss = 1.9505e-01, PNorm = 42.6443, GNorm = 2.0975, lr_0 = 9.8160e-04
Loss = 1.9683e-01, PNorm = 42.6710, GNorm = 3.5855, lr_0 = 9.8093e-04
Loss = 2.3864e-01, PNorm = 42.6920, GNorm = 1.7163, lr_0 = 9.8026e-04
Loss = 2.5183e-01, PNorm = 42.7123, GNorm = 2.5017, lr_0 = 9.7958e-04
Loss = 2.3798e-01, PNorm = 42.7337, GNorm = 4.6448, lr_0 = 9.7891e-04
Loss = 3.0906e-01, PNorm = 42.7680, GNorm = 3.7799, lr_0 = 9.7824e-04
Loss = 1.9943e-01, PNorm = 42.8046, GNorm = 1.4183, lr_0 = 9.7757e-04
Loss = 2.1830e-01, PNorm = 42.8425, GNorm = 2.2902, lr_0 = 9.7690e-04
Loss = 2.0323e-01, PNorm = 42.8692, GNorm = 1.2682, lr_0 = 9.7623e-04
Loss = 2.0114e-01, PNorm = 42.8872, GNorm = 6.1403, lr_0 = 9.7556e-04
Loss = 1.9356e-01, PNorm = 42.9048, GNorm = 2.4683, lr_0 = 9.7490e-04
Loss = 2.2374e-01, PNorm = 42.9419, GNorm = 1.1672, lr_0 = 9.7423e-04
Loss = 2.0640e-01, PNorm = 42.9819, GNorm = 1.2171, lr_0 = 9.7356e-04
Loss = 2.1427e-01, PNorm = 43.0145, GNorm = 0.9308, lr_0 = 9.7289e-04
Loss = 1.9660e-01, PNorm = 43.0318, GNorm = 1.0316, lr_0 = 9.7223e-04
Loss = 1.7086e-01, PNorm = 43.0575, GNorm = 1.8964, lr_0 = 9.7156e-04
Loss = 1.8290e-01, PNorm = 43.0781, GNorm = 3.8909, lr_0 = 9.7090e-04
Loss = 1.9966e-01, PNorm = 43.1000, GNorm = 4.9332, lr_0 = 9.7023e-04
Loss = 2.1629e-01, PNorm = 43.1263, GNorm = 3.6336, lr_0 = 9.6957e-04
Loss = 1.9429e-01, PNorm = 43.1500, GNorm = 1.6779, lr_0 = 9.6890e-04
Loss = 1.9775e-01, PNorm = 43.1821, GNorm = 2.4916, lr_0 = 9.6824e-04
Loss = 1.7444e-01, PNorm = 43.2116, GNorm = 3.6017, lr_0 = 9.6757e-04
Loss = 1.8277e-01, PNorm = 43.2426, GNorm = 2.4814, lr_0 = 9.6691e-04
Loss = 1.9461e-01, PNorm = 43.2651, GNorm = 1.6342, lr_0 = 9.6625e-04
Loss = 1.5725e-01, PNorm = 43.2793, GNorm = 2.2690, lr_0 = 9.6559e-04
Loss = 1.9154e-01, PNorm = 43.2936, GNorm = 3.8270, lr_0 = 9.6493e-04
Loss = 2.3636e-01, PNorm = 43.3115, GNorm = 4.8454, lr_0 = 9.6427e-04
Loss = 2.0750e-01, PNorm = 43.3335, GNorm = 1.2487, lr_0 = 9.6360e-04
Loss = 2.1396e-01, PNorm = 43.3647, GNorm = 2.7919, lr_0 = 9.6294e-04
Loss = 1.7172e-01, PNorm = 43.3968, GNorm = 3.0525, lr_0 = 9.6228e-04
Loss = 1.8267e-01, PNorm = 43.4192, GNorm = 2.6912, lr_0 = 9.6163e-04
Loss = 1.7835e-01, PNorm = 43.4466, GNorm = 1.7251, lr_0 = 9.6097e-04
Loss = 1.9343e-01, PNorm = 43.4746, GNorm = 0.9601, lr_0 = 9.6031e-04
Loss = 2.0180e-01, PNorm = 43.5057, GNorm = 1.7735, lr_0 = 9.5965e-04
Loss = 1.8740e-01, PNorm = 43.5264, GNorm = 1.8211, lr_0 = 9.5899e-04
Loss = 2.2106e-01, PNorm = 43.5474, GNorm = 4.7820, lr_0 = 9.5834e-04
Loss = 1.6370e-01, PNorm = 43.5718, GNorm = 1.1013, lr_0 = 9.5768e-04
Loss = 1.8637e-01, PNorm = 43.5937, GNorm = 2.3093, lr_0 = 9.5702e-04
Loss = 1.9056e-01, PNorm = 43.6115, GNorm = 0.8622, lr_0 = 9.5637e-04
Loss = 1.5772e-01, PNorm = 43.6308, GNorm = 1.0240, lr_0 = 9.5571e-04
Loss = 1.7688e-01, PNorm = 43.6555, GNorm = 1.7925, lr_0 = 9.5506e-04
Loss = 1.8557e-01, PNorm = 43.6717, GNorm = 2.8216, lr_0 = 9.5440e-04
Loss = 2.1301e-01, PNorm = 43.6975, GNorm = 1.1081, lr_0 = 9.5375e-04
Loss = 1.9901e-01, PNorm = 43.7210, GNorm = 4.8451, lr_0 = 9.5310e-04
Loss = 1.9893e-01, PNorm = 43.7429, GNorm = 3.1310, lr_0 = 9.5244e-04
Loss = 2.2333e-01, PNorm = 43.7662, GNorm = 0.7277, lr_0 = 9.5179e-04
Loss = 2.1207e-01, PNorm = 43.7960, GNorm = 2.4678, lr_0 = 9.5114e-04
Loss = 1.9426e-01, PNorm = 43.8175, GNorm = 1.9801, lr_0 = 9.5049e-04
Loss = 1.8491e-01, PNorm = 43.8411, GNorm = 3.9618, lr_0 = 9.4984e-04
Loss = 1.9208e-01, PNorm = 43.8620, GNorm = 1.6429, lr_0 = 9.4919e-04
Loss = 1.9996e-01, PNorm = 43.8899, GNorm = 2.6683, lr_0 = 9.4854e-04
Loss = 1.8895e-01, PNorm = 43.9130, GNorm = 3.9841, lr_0 = 9.4789e-04
Loss = 1.6392e-01, PNorm = 43.9351, GNorm = 1.0634, lr_0 = 9.4724e-04
Loss = 1.7146e-01, PNorm = 43.9578, GNorm = 3.4653, lr_0 = 9.4659e-04
Loss = 2.6336e-01, PNorm = 43.9802, GNorm = 6.2755, lr_0 = 9.4594e-04
Loss = 2.3351e-01, PNorm = 44.0116, GNorm = 0.7554, lr_0 = 9.4529e-04
Loss = 2.1496e-01, PNorm = 44.0558, GNorm = 1.5226, lr_0 = 9.4464e-04
Loss = 1.8574e-01, PNorm = 44.0970, GNorm = 1.0081, lr_0 = 9.4400e-04
Loss = 1.9830e-01, PNorm = 44.1282, GNorm = 5.2673, lr_0 = 9.4335e-04
Loss = 2.0401e-01, PNorm = 44.1517, GNorm = 1.0055, lr_0 = 9.4270e-04
Loss = 2.1373e-01, PNorm = 44.1797, GNorm = 1.5630, lr_0 = 9.4206e-04
Loss = 2.0197e-01, PNorm = 44.2130, GNorm = 1.3815, lr_0 = 9.4141e-04
Loss = 1.9604e-01, PNorm = 44.2434, GNorm = 2.1539, lr_0 = 9.4077e-04
Loss = 1.6573e-01, PNorm = 44.2730, GNorm = 0.8810, lr_0 = 9.4012e-04
Loss = 1.8341e-01, PNorm = 44.2858, GNorm = 1.7180, lr_0 = 9.3948e-04
Loss = 1.9316e-01, PNorm = 44.2942, GNorm = 2.4240, lr_0 = 9.3884e-04
Loss = 1.8977e-01, PNorm = 44.3124, GNorm = 1.2709, lr_0 = 9.3819e-04
Loss = 1.7636e-01, PNorm = 44.3396, GNorm = 0.7807, lr_0 = 9.3755e-04
Loss = 1.9742e-01, PNorm = 44.3576, GNorm = 4.6827, lr_0 = 9.3691e-04
Loss = 2.0263e-01, PNorm = 44.3738, GNorm = 2.3540, lr_0 = 9.3627e-04
Loss = 1.9808e-01, PNorm = 44.3991, GNorm = 3.7204, lr_0 = 9.3562e-04
Loss = 1.8745e-01, PNorm = 44.4242, GNorm = 1.2025, lr_0 = 9.3498e-04
Loss = 1.7698e-01, PNorm = 44.4461, GNorm = 1.9321, lr_0 = 9.3434e-04
Loss = 1.5718e-01, PNorm = 44.4702, GNorm = 1.8297, lr_0 = 9.3370e-04
Loss = 1.9186e-01, PNorm = 44.4955, GNorm = 1.4836, lr_0 = 9.3306e-04
Loss = 1.7345e-01, PNorm = 44.5193, GNorm = 0.9098, lr_0 = 9.3242e-04
Loss = 1.9937e-01, PNorm = 44.5430, GNorm = 2.7018, lr_0 = 9.3178e-04
Loss = 1.9794e-01, PNorm = 44.5744, GNorm = 2.1570, lr_0 = 9.3115e-04
Loss = 1.9002e-01, PNorm = 44.6008, GNorm = 4.0596, lr_0 = 9.3051e-04
Loss = 2.0751e-01, PNorm = 44.6353, GNorm = 2.1723, lr_0 = 9.2987e-04
Loss = 2.4122e-01, PNorm = 44.6628, GNorm = 2.0646, lr_0 = 9.2923e-04
Loss = 1.8795e-01, PNorm = 44.6914, GNorm = 0.9616, lr_0 = 9.2860e-04
Loss = 1.9222e-01, PNorm = 44.7194, GNorm = 0.7964, lr_0 = 9.2796e-04
Loss = 1.5521e-01, PNorm = 44.7521, GNorm = 0.7321, lr_0 = 9.2733e-04
Loss = 1.8964e-01, PNorm = 44.7732, GNorm = 3.1066, lr_0 = 9.2669e-04
Loss = 2.0105e-01, PNorm = 44.7957, GNorm = 3.8587, lr_0 = 9.2606e-04
Loss = 2.0747e-01, PNorm = 44.8178, GNorm = 1.0623, lr_0 = 9.2542e-04
Loss = 1.9539e-01, PNorm = 44.8457, GNorm = 1.9635, lr_0 = 9.2479e-04
Loss = 1.9689e-01, PNorm = 44.8725, GNorm = 0.9213, lr_0 = 9.2415e-04
Loss = 1.8982e-01, PNorm = 44.8969, GNorm = 1.5537, lr_0 = 9.2352e-04
Loss = 1.6227e-01, PNorm = 44.9255, GNorm = 1.5995, lr_0 = 9.2289e-04
Loss = 1.7915e-01, PNorm = 44.9569, GNorm = 0.8329, lr_0 = 9.2226e-04
Loss = 1.6355e-01, PNorm = 44.9832, GNorm = 2.2772, lr_0 = 9.2162e-04
Loss = 1.7783e-01, PNorm = 45.0039, GNorm = 1.4889, lr_0 = 9.2099e-04
Validation mae = 0.487550
Epoch 3
Loss = 1.9086e-01, PNorm = 45.0308, GNorm = 0.9294, lr_0 = 9.2036e-04
Loss = 2.0185e-01, PNorm = 45.0537, GNorm = 3.1263, lr_0 = 9.1973e-04
Loss = 1.6376e-01, PNorm = 45.0842, GNorm = 0.7358, lr_0 = 9.1910e-04
Loss = 1.5930e-01, PNorm = 45.1110, GNorm = 2.9049, lr_0 = 9.1847e-04
Loss = 1.7788e-01, PNorm = 45.1323, GNorm = 1.2720, lr_0 = 9.1784e-04
Loss = 1.7418e-01, PNorm = 45.1524, GNorm = 3.1393, lr_0 = 9.1721e-04
Loss = 1.5762e-01, PNorm = 45.1752, GNorm = 2.1163, lr_0 = 9.1658e-04
Loss = 1.5650e-01, PNorm = 45.2031, GNorm = 1.3394, lr_0 = 9.1596e-04
Loss = 1.6084e-01, PNorm = 45.2265, GNorm = 1.3062, lr_0 = 9.1533e-04
Loss = 1.6011e-01, PNorm = 45.2496, GNorm = 2.0995, lr_0 = 9.1470e-04
Loss = 1.6561e-01, PNorm = 45.2711, GNorm = 0.9243, lr_0 = 9.1408e-04
Loss = 1.5748e-01, PNorm = 45.2895, GNorm = 0.7744, lr_0 = 9.1345e-04
Loss = 1.8446e-01, PNorm = 45.3048, GNorm = 0.7765, lr_0 = 9.1282e-04
Loss = 1.4976e-01, PNorm = 45.3236, GNorm = 1.5734, lr_0 = 9.1220e-04
Loss = 1.4186e-01, PNorm = 45.3355, GNorm = 1.6585, lr_0 = 9.1157e-04
Loss = 1.6616e-01, PNorm = 45.3498, GNorm = 3.1465, lr_0 = 9.1095e-04
Loss = 1.6270e-01, PNorm = 45.3652, GNorm = 0.9838, lr_0 = 9.1032e-04
Loss = 1.6398e-01, PNorm = 45.3916, GNorm = 1.1341, lr_0 = 9.0970e-04
Loss = 1.5854e-01, PNorm = 45.4120, GNorm = 2.0330, lr_0 = 9.0908e-04
Loss = 1.7816e-01, PNorm = 45.4293, GNorm = 1.3270, lr_0 = 9.0846e-04
Loss = 1.7888e-01, PNorm = 45.4561, GNorm = 1.5312, lr_0 = 9.0783e-04
Loss = 1.8388e-01, PNorm = 45.4817, GNorm = 2.1341, lr_0 = 9.0721e-04
Loss = 2.1095e-01, PNorm = 45.5028, GNorm = 1.2423, lr_0 = 9.0659e-04
Loss = 1.7649e-01, PNorm = 45.5314, GNorm = 1.6345, lr_0 = 9.0597e-04
Loss = 1.6407e-01, PNorm = 45.5552, GNorm = 1.7299, lr_0 = 9.0535e-04
Loss = 1.8894e-01, PNorm = 45.5654, GNorm = 4.9204, lr_0 = 9.0473e-04
Loss = 1.7532e-01, PNorm = 45.5930, GNorm = 1.3128, lr_0 = 9.0411e-04
Loss = 1.6197e-01, PNorm = 45.6212, GNorm = 0.9014, lr_0 = 9.0349e-04
Loss = 1.6749e-01, PNorm = 45.6459, GNorm = 2.3532, lr_0 = 9.0287e-04
Loss = 1.5640e-01, PNorm = 45.6680, GNorm = 1.2434, lr_0 = 9.0225e-04
Loss = 1.8362e-01, PNorm = 45.6869, GNorm = 1.6837, lr_0 = 9.0163e-04
Loss = 1.5053e-01, PNorm = 45.7091, GNorm = 0.6427, lr_0 = 9.0102e-04
Loss = 1.9703e-01, PNorm = 45.7246, GNorm = 2.2916, lr_0 = 9.0040e-04
Loss = 1.7324e-01, PNorm = 45.7441, GNorm = 1.9961, lr_0 = 8.9978e-04
Loss = 1.4905e-01, PNorm = 45.7700, GNorm = 1.0103, lr_0 = 8.9916e-04
Loss = 1.7342e-01, PNorm = 45.7982, GNorm = 1.6711, lr_0 = 8.9855e-04
Loss = 1.6948e-01, PNorm = 45.8261, GNorm = 1.4809, lr_0 = 8.9793e-04
Loss = 1.7576e-01, PNorm = 45.8377, GNorm = 2.3176, lr_0 = 8.9732e-04
Loss = 1.7170e-01, PNorm = 45.8531, GNorm = 1.3336, lr_0 = 8.9670e-04
Loss = 1.5200e-01, PNorm = 45.8676, GNorm = 1.1892, lr_0 = 8.9609e-04
Loss = 1.5927e-01, PNorm = 45.8858, GNorm = 0.5802, lr_0 = 8.9548e-04
Loss = 1.7661e-01, PNorm = 45.9075, GNorm = 2.1437, lr_0 = 8.9486e-04
Loss = 1.9736e-01, PNorm = 45.9368, GNorm = 3.2907, lr_0 = 8.9425e-04
Loss = 1.8146e-01, PNorm = 45.9797, GNorm = 1.8057, lr_0 = 8.9364e-04
Loss = 1.5991e-01, PNorm = 46.0075, GNorm = 1.7086, lr_0 = 8.9302e-04
Loss = 1.7994e-01, PNorm = 46.0380, GNorm = 2.3638, lr_0 = 8.9241e-04
Loss = 1.9859e-01, PNorm = 46.0649, GNorm = 0.6368, lr_0 = 8.9180e-04
Loss = 1.7837e-01, PNorm = 46.0913, GNorm = 1.2777, lr_0 = 8.9119e-04
Loss = 1.7515e-01, PNorm = 46.1266, GNorm = 1.9338, lr_0 = 8.9058e-04
Loss = 2.0180e-01, PNorm = 46.1522, GNorm = 3.1319, lr_0 = 8.8997e-04
Loss = 1.8219e-01, PNorm = 46.1801, GNorm = 0.7162, lr_0 = 8.8936e-04
Loss = 1.6154e-01, PNorm = 46.2048, GNorm = 0.9430, lr_0 = 8.8875e-04
Loss = 1.6024e-01, PNorm = 46.2201, GNorm = 0.9304, lr_0 = 8.8814e-04
Loss = 1.8219e-01, PNorm = 46.2406, GNorm = 1.5254, lr_0 = 8.8753e-04
Loss = 1.7587e-01, PNorm = 46.2644, GNorm = 4.4054, lr_0 = 8.8693e-04
Loss = 2.0152e-01, PNorm = 46.2951, GNorm = 1.1955, lr_0 = 8.8632e-04
Loss = 1.8205e-01, PNorm = 46.3275, GNorm = 1.3408, lr_0 = 8.8571e-04
Loss = 1.7680e-01, PNorm = 46.3481, GNorm = 1.1411, lr_0 = 8.8510e-04
Loss = 1.5155e-01, PNorm = 46.3679, GNorm = 1.4827, lr_0 = 8.8450e-04
Loss = 1.6708e-01, PNorm = 46.3869, GNorm = 1.2297, lr_0 = 8.8389e-04
Loss = 1.5265e-01, PNorm = 46.4118, GNorm = 2.2756, lr_0 = 8.8329e-04
Loss = 1.7320e-01, PNorm = 46.4404, GNorm = 1.9895, lr_0 = 8.8268e-04
Loss = 1.8274e-01, PNorm = 46.4723, GNorm = 1.1728, lr_0 = 8.8208e-04
Loss = 1.3926e-01, PNorm = 46.4979, GNorm = 1.2422, lr_0 = 8.8147e-04
Loss = 1.6321e-01, PNorm = 46.5102, GNorm = 0.7458, lr_0 = 8.8087e-04
Loss = 1.8975e-01, PNorm = 46.5280, GNorm = 3.1543, lr_0 = 8.8026e-04
Loss = 1.7324e-01, PNorm = 46.5524, GNorm = 3.1274, lr_0 = 8.7966e-04
Loss = 1.6653e-01, PNorm = 46.5738, GNorm = 0.9601, lr_0 = 8.7906e-04
Loss = 1.5965e-01, PNorm = 46.5890, GNorm = 1.0888, lr_0 = 8.7846e-04
Loss = 1.5480e-01, PNorm = 46.6025, GNorm = 1.2051, lr_0 = 8.7785e-04
Loss = 1.7589e-01, PNorm = 46.6215, GNorm = 0.7722, lr_0 = 8.7725e-04
Loss = 1.6869e-01, PNorm = 46.6395, GNorm = 2.4918, lr_0 = 8.7665e-04
Loss = 1.6280e-01, PNorm = 46.6558, GNorm = 1.9957, lr_0 = 8.7605e-04
Loss = 1.6323e-01, PNorm = 46.6789, GNorm = 1.1686, lr_0 = 8.7545e-04
Loss = 1.6865e-01, PNorm = 46.7030, GNorm = 0.8285, lr_0 = 8.7485e-04
Loss = 1.8363e-01, PNorm = 46.7147, GNorm = 0.8069, lr_0 = 8.7425e-04
Loss = 1.7662e-01, PNorm = 46.7342, GNorm = 1.3883, lr_0 = 8.7365e-04
Loss = 1.8175e-01, PNorm = 46.7593, GNorm = 1.2324, lr_0 = 8.7306e-04
Loss = 1.8839e-01, PNorm = 46.7907, GNorm = 2.4822, lr_0 = 8.7246e-04
Loss = 1.5596e-01, PNorm = 46.8187, GNorm = 0.8838, lr_0 = 8.7186e-04
Loss = 1.5174e-01, PNorm = 46.8415, GNorm = 1.5142, lr_0 = 8.7126e-04
Loss = 1.5745e-01, PNorm = 46.8594, GNorm = 0.8856, lr_0 = 8.7067e-04
Loss = 1.8708e-01, PNorm = 46.8879, GNorm = 3.1804, lr_0 = 8.7007e-04
Loss = 1.9907e-01, PNorm = 46.9133, GNorm = 1.3416, lr_0 = 8.6947e-04
Loss = 1.8275e-01, PNorm = 46.9382, GNorm = 2.3660, lr_0 = 8.6888e-04
Loss = 1.9859e-01, PNorm = 46.9758, GNorm = 1.4016, lr_0 = 8.6828e-04
Loss = 1.8023e-01, PNorm = 46.9920, GNorm = 1.2245, lr_0 = 8.6769e-04
Loss = 1.5639e-01, PNorm = 47.0064, GNorm = 1.0656, lr_0 = 8.6709e-04
Loss = 1.6803e-01, PNorm = 47.0249, GNorm = 0.7637, lr_0 = 8.6650e-04
Loss = 1.6210e-01, PNorm = 47.0514, GNorm = 0.7665, lr_0 = 8.6590e-04
Loss = 1.6658e-01, PNorm = 47.0701, GNorm = 2.3573, lr_0 = 8.6531e-04
Loss = 1.5894e-01, PNorm = 47.0918, GNorm = 1.6505, lr_0 = 8.6472e-04
Loss = 1.4516e-01, PNorm = 47.1143, GNorm = 0.9402, lr_0 = 8.6413e-04
Loss = 1.5440e-01, PNorm = 47.1313, GNorm = 1.7571, lr_0 = 8.6353e-04
Loss = 1.7142e-01, PNorm = 47.1463, GNorm = 1.7393, lr_0 = 8.6294e-04
Loss = 1.8863e-01, PNorm = 47.1735, GNorm = 2.9849, lr_0 = 8.6235e-04
Loss = 1.9332e-01, PNorm = 47.2087, GNorm = 2.1260, lr_0 = 8.6176e-04
Loss = 1.4673e-01, PNorm = 47.2278, GNorm = 2.4368, lr_0 = 8.6117e-04
Loss = 1.6881e-01, PNorm = 47.2452, GNorm = 2.0095, lr_0 = 8.6058e-04
Loss = 1.8492e-01, PNorm = 47.2605, GNorm = 2.2979, lr_0 = 8.5999e-04
Loss = 1.7067e-01, PNorm = 47.2773, GNorm = 1.2130, lr_0 = 8.5940e-04
Loss = 2.0312e-01, PNorm = 47.3023, GNorm = 4.2718, lr_0 = 8.5881e-04
Loss = 1.6329e-01, PNorm = 47.3320, GNorm = 2.7871, lr_0 = 8.5823e-04
Loss = 1.6568e-01, PNorm = 47.3528, GNorm = 2.4923, lr_0 = 8.5764e-04
Loss = 1.4398e-01, PNorm = 47.3763, GNorm = 0.8148, lr_0 = 8.5705e-04
Loss = 1.5137e-01, PNorm = 47.3985, GNorm = 1.0854, lr_0 = 8.5646e-04
Loss = 1.3729e-01, PNorm = 47.4177, GNorm = 0.7976, lr_0 = 8.5588e-04
Loss = 1.7873e-01, PNorm = 47.4354, GNorm = 1.3609, lr_0 = 8.5529e-04
Loss = 1.7646e-01, PNorm = 47.4585, GNorm = 0.8893, lr_0 = 8.5470e-04
Loss = 1.4541e-01, PNorm = 47.4841, GNorm = 1.2534, lr_0 = 8.5412e-04
Loss = 1.3687e-01, PNorm = 47.5014, GNorm = 1.2128, lr_0 = 8.5353e-04
Loss = 1.4651e-01, PNorm = 47.5199, GNorm = 0.6660, lr_0 = 8.5295e-04
Loss = 1.4806e-01, PNorm = 47.5369, GNorm = 1.0266, lr_0 = 8.5236e-04
Loss = 1.8028e-01, PNorm = 47.5548, GNorm = 0.8378, lr_0 = 8.5178e-04
Loss = 1.7125e-01, PNorm = 47.5739, GNorm = 1.9806, lr_0 = 8.5120e-04
Loss = 1.4415e-01, PNorm = 47.5935, GNorm = 2.0249, lr_0 = 8.5061e-04
Loss = 1.5792e-01, PNorm = 47.6151, GNorm = 1.1939, lr_0 = 8.5003e-04
Loss = 1.5303e-01, PNorm = 47.6412, GNorm = 1.5275, lr_0 = 8.4945e-04
Loss = 1.4879e-01, PNorm = 47.6619, GNorm = 1.1163, lr_0 = 8.4887e-04
Loss = 1.3230e-01, PNorm = 47.6727, GNorm = 1.0527, lr_0 = 8.4828e-04
Validation mae = 0.458463
Epoch 4
Loss = 1.4084e-01, PNorm = 47.6858, GNorm = 1.1519, lr_0 = 8.4770e-04
Loss = 1.6357e-01, PNorm = 47.7062, GNorm = 1.3424, lr_0 = 8.4712e-04
Loss = 1.4815e-01, PNorm = 47.7208, GNorm = 0.7829, lr_0 = 8.4654e-04
Loss = 1.3238e-01, PNorm = 47.7387, GNorm = 2.2994, lr_0 = 8.4596e-04
Loss = 1.5480e-01, PNorm = 47.7588, GNorm = 1.6089, lr_0 = 8.4538e-04
Loss = 1.8120e-01, PNorm = 47.7858, GNorm = 3.3251, lr_0 = 8.4480e-04
Loss = 1.7594e-01, PNorm = 47.8140, GNorm = 1.2793, lr_0 = 8.4423e-04
Loss = 1.4737e-01, PNorm = 47.8366, GNorm = 1.7499, lr_0 = 8.4365e-04
Loss = 1.6312e-01, PNorm = 47.8625, GNorm = 1.6771, lr_0 = 8.4307e-04
Loss = 1.5488e-01, PNorm = 47.8854, GNorm = 1.0918, lr_0 = 8.4249e-04
Loss = 1.6352e-01, PNorm = 47.9064, GNorm = 0.7797, lr_0 = 8.4191e-04
Loss = 1.7206e-01, PNorm = 47.9197, GNorm = 0.9498, lr_0 = 8.4134e-04
Loss = 1.5950e-01, PNorm = 47.9377, GNorm = 1.3906, lr_0 = 8.4076e-04
Loss = 1.5137e-01, PNorm = 47.9567, GNorm = 0.8215, lr_0 = 8.4019e-04
Loss = 1.4811e-01, PNorm = 47.9782, GNorm = 0.9529, lr_0 = 8.3961e-04
Loss = 1.6135e-01, PNorm = 48.0032, GNorm = 0.8180, lr_0 = 8.3903e-04
Loss = 1.2697e-01, PNorm = 48.0295, GNorm = 1.6816, lr_0 = 8.3846e-04
Loss = 1.4054e-01, PNorm = 48.0486, GNorm = 1.4262, lr_0 = 8.3789e-04
Loss = 1.5624e-01, PNorm = 48.0663, GNorm = 1.5526, lr_0 = 8.3731e-04
Loss = 1.5684e-01, PNorm = 48.0897, GNorm = 1.2353, lr_0 = 8.3674e-04
Loss = 1.4767e-01, PNorm = 48.1126, GNorm = 1.6801, lr_0 = 8.3616e-04
Loss = 1.7313e-01, PNorm = 48.1327, GNorm = 2.7986, lr_0 = 8.3559e-04
Loss = 2.0558e-01, PNorm = 48.1620, GNorm = 1.3561, lr_0 = 8.3502e-04
Loss = 1.6185e-01, PNorm = 48.1910, GNorm = 1.2190, lr_0 = 8.3445e-04
Loss = 1.4448e-01, PNorm = 48.2136, GNorm = 1.7487, lr_0 = 8.3388e-04
Loss = 1.4976e-01, PNorm = 48.2300, GNorm = 0.8178, lr_0 = 8.3330e-04
Loss = 1.4975e-01, PNorm = 48.2500, GNorm = 0.7511, lr_0 = 8.3273e-04
Loss = 1.4991e-01, PNorm = 48.2686, GNorm = 0.8856, lr_0 = 8.3216e-04
Loss = 1.5484e-01, PNorm = 48.2847, GNorm = 1.6589, lr_0 = 8.3159e-04
Loss = 1.5147e-01, PNorm = 48.3049, GNorm = 1.2384, lr_0 = 8.3102e-04
Loss = 1.6698e-01, PNorm = 48.3238, GNorm = 2.3129, lr_0 = 8.3045e-04
Loss = 1.5650e-01, PNorm = 48.3471, GNorm = 1.5907, lr_0 = 8.2988e-04
Loss = 1.4308e-01, PNorm = 48.3748, GNorm = 0.8077, lr_0 = 8.2932e-04
Loss = 1.3514e-01, PNorm = 48.3972, GNorm = 0.6976, lr_0 = 8.2875e-04
Loss = 1.4365e-01, PNorm = 48.4210, GNorm = 0.6241, lr_0 = 8.2818e-04
Loss = 1.3697e-01, PNorm = 48.4347, GNorm = 0.9497, lr_0 = 8.2761e-04
Loss = 1.6559e-01, PNorm = 48.4531, GNorm = 1.7700, lr_0 = 8.2705e-04
Loss = 1.5337e-01, PNorm = 48.4722, GNorm = 1.2780, lr_0 = 8.2648e-04
Loss = 1.5506e-01, PNorm = 48.4951, GNorm = 0.7750, lr_0 = 8.2591e-04
Loss = 1.5954e-01, PNorm = 48.5180, GNorm = 2.1927, lr_0 = 8.2535e-04
Loss = 1.4499e-01, PNorm = 48.5421, GNorm = 1.0124, lr_0 = 8.2478e-04
Loss = 1.4102e-01, PNorm = 48.5577, GNorm = 0.5241, lr_0 = 8.2422e-04
Loss = 1.4660e-01, PNorm = 48.5748, GNorm = 1.5998, lr_0 = 8.2365e-04
Loss = 1.5956e-01, PNorm = 48.5961, GNorm = 0.7143, lr_0 = 8.2309e-04
Loss = 1.6324e-01, PNorm = 48.6232, GNorm = 3.1411, lr_0 = 8.2252e-04
Loss = 1.6593e-01, PNorm = 48.6454, GNorm = 1.0691, lr_0 = 8.2196e-04
Loss = 1.5347e-01, PNorm = 48.6697, GNorm = 0.9455, lr_0 = 8.2140e-04
Loss = 1.7441e-01, PNorm = 48.7019, GNorm = 3.0417, lr_0 = 8.2084e-04
Loss = 1.6219e-01, PNorm = 48.7364, GNorm = 1.1126, lr_0 = 8.2027e-04
Loss = 1.2550e-01, PNorm = 48.7661, GNorm = 1.6827, lr_0 = 8.1971e-04
Loss = 1.2006e-01, PNorm = 48.7798, GNorm = 1.1294, lr_0 = 8.1915e-04
Loss = 1.1624e-01, PNorm = 48.7924, GNorm = 1.1435, lr_0 = 8.1859e-04
Loss = 1.4084e-01, PNorm = 48.8054, GNorm = 1.7028, lr_0 = 8.1803e-04
Loss = 1.3401e-01, PNorm = 48.8158, GNorm = 2.5780, lr_0 = 8.1747e-04
Loss = 1.3915e-01, PNorm = 48.8330, GNorm = 1.0546, lr_0 = 8.1691e-04
Loss = 1.4887e-01, PNorm = 48.8446, GNorm = 0.9445, lr_0 = 8.1635e-04
Loss = 1.5531e-01, PNorm = 48.8626, GNorm = 1.4461, lr_0 = 8.1579e-04
Loss = 1.5442e-01, PNorm = 48.8820, GNorm = 1.4974, lr_0 = 8.1523e-04
Loss = 1.3689e-01, PNorm = 48.9047, GNorm = 0.6647, lr_0 = 8.1467e-04
Loss = 1.4262e-01, PNorm = 48.9263, GNorm = 0.7723, lr_0 = 8.1411e-04
Loss = 1.5699e-01, PNorm = 48.9491, GNorm = 1.7020, lr_0 = 8.1355e-04
Loss = 1.6280e-01, PNorm = 48.9683, GNorm = 1.3170, lr_0 = 8.1300e-04
Loss = 1.5404e-01, PNorm = 48.9873, GNorm = 1.5242, lr_0 = 8.1244e-04
Loss = 1.5627e-01, PNorm = 49.0095, GNorm = 1.9147, lr_0 = 8.1188e-04
Loss = 1.5105e-01, PNorm = 49.0315, GNorm = 1.5028, lr_0 = 8.1133e-04
Loss = 1.6357e-01, PNorm = 49.0541, GNorm = 2.6837, lr_0 = 8.1077e-04
Loss = 1.5532e-01, PNorm = 49.0810, GNorm = 0.6235, lr_0 = 8.1022e-04
Loss = 1.5043e-01, PNorm = 49.1049, GNorm = 0.7487, lr_0 = 8.0966e-04
Loss = 1.3861e-01, PNorm = 49.1270, GNorm = 0.4923, lr_0 = 8.0911e-04
Loss = 1.5587e-01, PNorm = 49.1395, GNorm = 0.8486, lr_0 = 8.0855e-04
Loss = 1.4354e-01, PNorm = 49.1540, GNorm = 2.1126, lr_0 = 8.0800e-04
Loss = 1.3944e-01, PNorm = 49.1745, GNorm = 0.6917, lr_0 = 8.0745e-04
Loss = 1.5968e-01, PNorm = 49.1953, GNorm = 2.2074, lr_0 = 8.0689e-04
Loss = 1.5199e-01, PNorm = 49.2189, GNorm = 1.1355, lr_0 = 8.0634e-04
Loss = 1.5187e-01, PNorm = 49.2386, GNorm = 0.7649, lr_0 = 8.0579e-04
Loss = 1.5288e-01, PNorm = 49.2607, GNorm = 1.4102, lr_0 = 8.0523e-04
Loss = 1.5263e-01, PNorm = 49.2849, GNorm = 1.1295, lr_0 = 8.0468e-04
Loss = 1.6940e-01, PNorm = 49.3054, GNorm = 1.0791, lr_0 = 8.0413e-04
Loss = 1.3443e-01, PNorm = 49.3271, GNorm = 0.8189, lr_0 = 8.0358e-04
Loss = 1.6973e-01, PNorm = 49.3498, GNorm = 1.3950, lr_0 = 8.0303e-04
Loss = 1.3810e-01, PNorm = 49.3682, GNorm = 0.7946, lr_0 = 8.0248e-04
Loss = 1.3787e-01, PNorm = 49.3858, GNorm = 1.0125, lr_0 = 8.0193e-04
Loss = 1.3705e-01, PNorm = 49.4056, GNorm = 0.7734, lr_0 = 8.0138e-04
Loss = 1.6132e-01, PNorm = 49.4230, GNorm = 0.6932, lr_0 = 8.0083e-04
Loss = 1.7821e-01, PNorm = 49.4503, GNorm = 1.4570, lr_0 = 8.0028e-04
Loss = 1.5929e-01, PNorm = 49.4759, GNorm = 1.1251, lr_0 = 7.9974e-04
Loss = 1.3876e-01, PNorm = 49.5035, GNorm = 2.4232, lr_0 = 7.9919e-04
Loss = 1.4376e-01, PNorm = 49.5142, GNorm = 1.0487, lr_0 = 7.9864e-04
Loss = 1.6328e-01, PNorm = 49.5341, GNorm = 1.4569, lr_0 = 7.9809e-04
Loss = 1.5143e-01, PNorm = 49.5547, GNorm = 0.8393, lr_0 = 7.9755e-04
Loss = 1.6090e-01, PNorm = 49.5792, GNorm = 0.7275, lr_0 = 7.9700e-04
Loss = 1.4987e-01, PNorm = 49.5941, GNorm = 0.9353, lr_0 = 7.9645e-04
Loss = 1.5430e-01, PNorm = 49.6142, GNorm = 1.9563, lr_0 = 7.9591e-04
Loss = 1.7996e-01, PNorm = 49.6340, GNorm = 1.1653, lr_0 = 7.9536e-04
Loss = 1.6340e-01, PNorm = 49.6559, GNorm = 1.4999, lr_0 = 7.9482e-04
Loss = 1.5646e-01, PNorm = 49.6809, GNorm = 0.6921, lr_0 = 7.9427e-04
Loss = 1.3838e-01, PNorm = 49.6978, GNorm = 0.6148, lr_0 = 7.9373e-04
Loss = 1.5525e-01, PNorm = 49.7158, GNorm = 2.3408, lr_0 = 7.9319e-04
Loss = 1.5040e-01, PNorm = 49.7301, GNorm = 0.6523, lr_0 = 7.9264e-04
Loss = 1.4043e-01, PNorm = 49.7453, GNorm = 0.8401, lr_0 = 7.9210e-04
Loss = 1.4101e-01, PNorm = 49.7568, GNorm = 1.0034, lr_0 = 7.9156e-04
Loss = 1.3102e-01, PNorm = 49.7775, GNorm = 0.8053, lr_0 = 7.9101e-04
Loss = 1.3448e-01, PNorm = 49.7947, GNorm = 0.6874, lr_0 = 7.9047e-04
Loss = 1.4243e-01, PNorm = 49.8126, GNorm = 1.5158, lr_0 = 7.8993e-04
Loss = 1.3326e-01, PNorm = 49.8283, GNorm = 1.1223, lr_0 = 7.8939e-04
Loss = 1.5329e-01, PNorm = 49.8480, GNorm = 0.7996, lr_0 = 7.8885e-04
Loss = 1.3886e-01, PNorm = 49.8651, GNorm = 0.7058, lr_0 = 7.8831e-04
Loss = 1.6098e-01, PNorm = 49.8773, GNorm = 0.9342, lr_0 = 7.8777e-04
Loss = 1.5174e-01, PNorm = 49.8966, GNorm = 2.1179, lr_0 = 7.8723e-04
Loss = 1.5958e-01, PNorm = 49.9218, GNorm = 1.5040, lr_0 = 7.8669e-04
Loss = 1.4890e-01, PNorm = 49.9462, GNorm = 1.1551, lr_0 = 7.8615e-04
Loss = 1.5730e-01, PNorm = 49.9597, GNorm = 2.2740, lr_0 = 7.8561e-04
Loss = 1.3584e-01, PNorm = 49.9785, GNorm = 2.3079, lr_0 = 7.8507e-04
Loss = 1.3465e-01, PNorm = 49.9988, GNorm = 2.0871, lr_0 = 7.8454e-04
Loss = 1.7084e-01, PNorm = 50.0177, GNorm = 1.5815, lr_0 = 7.8400e-04
Loss = 1.5082e-01, PNorm = 50.0399, GNorm = 1.8604, lr_0 = 7.8346e-04
Loss = 1.4394e-01, PNorm = 50.0657, GNorm = 0.7409, lr_0 = 7.8293e-04
Loss = 1.6137e-01, PNorm = 50.0843, GNorm = 0.8907, lr_0 = 7.8239e-04
Loss = 1.4063e-01, PNorm = 50.1016, GNorm = 0.8103, lr_0 = 7.8185e-04
Loss = 1.4200e-01, PNorm = 50.1148, GNorm = 3.0965, lr_0 = 7.8132e-04
Validation mae = 0.467827
Epoch 5
Loss = 1.3914e-01, PNorm = 50.1351, GNorm = 0.9392, lr_0 = 7.8078e-04
Loss = 1.4679e-01, PNorm = 50.1601, GNorm = 0.5979, lr_0 = 7.8025e-04
Loss = 1.5965e-01, PNorm = 50.1829, GNorm = 1.7599, lr_0 = 7.7971e-04
Loss = 1.2517e-01, PNorm = 50.2098, GNorm = 0.8335, lr_0 = 7.7918e-04
Loss = 1.2487e-01, PNorm = 50.2261, GNorm = 1.0871, lr_0 = 7.7864e-04
Loss = 1.3270e-01, PNorm = 50.2458, GNorm = 2.3383, lr_0 = 7.7811e-04
Loss = 1.6176e-01, PNorm = 50.2736, GNorm = 1.8633, lr_0 = 7.7758e-04
Loss = 1.4194e-01, PNorm = 50.2935, GNorm = 1.8502, lr_0 = 7.7705e-04
Loss = 1.3609e-01, PNorm = 50.3135, GNorm = 1.2691, lr_0 = 7.7651e-04
Loss = 1.4990e-01, PNorm = 50.3343, GNorm = 2.1186, lr_0 = 7.7598e-04
Loss = 1.3380e-01, PNorm = 50.3564, GNorm = 1.0424, lr_0 = 7.7545e-04
Loss = 1.4764e-01, PNorm = 50.3809, GNorm = 1.5285, lr_0 = 7.7492e-04
Loss = 1.5004e-01, PNorm = 50.3969, GNorm = 1.0784, lr_0 = 7.7439e-04
Loss = 1.3103e-01, PNorm = 50.4153, GNorm = 0.6752, lr_0 = 7.7386e-04
Loss = 1.2871e-01, PNorm = 50.4302, GNorm = 1.2283, lr_0 = 7.7333e-04
Loss = 1.3182e-01, PNorm = 50.4403, GNorm = 1.3935, lr_0 = 7.7280e-04
Loss = 1.5662e-01, PNorm = 50.4603, GNorm = 0.7977, lr_0 = 7.7227e-04
Loss = 1.4218e-01, PNorm = 50.4839, GNorm = 0.7614, lr_0 = 7.7174e-04
Loss = 1.3987e-01, PNorm = 50.4981, GNorm = 0.7850, lr_0 = 7.7121e-04
Loss = 1.4719e-01, PNorm = 50.5170, GNorm = 0.7824, lr_0 = 7.7068e-04
Loss = 1.1635e-01, PNorm = 50.5346, GNorm = 0.9488, lr_0 = 7.7015e-04
Loss = 1.2386e-01, PNorm = 50.5579, GNorm = 0.6846, lr_0 = 7.6963e-04
Loss = 1.5438e-01, PNorm = 50.5804, GNorm = 2.1142, lr_0 = 7.6910e-04
Loss = 1.4429e-01, PNorm = 50.6024, GNorm = 0.7526, lr_0 = 7.6857e-04
Loss = 1.6515e-01, PNorm = 50.6229, GNorm = 1.1135, lr_0 = 7.6805e-04
Loss = 1.6989e-01, PNorm = 50.6532, GNorm = 3.2520, lr_0 = 7.6752e-04
Loss = 1.6005e-01, PNorm = 50.6783, GNorm = 1.2545, lr_0 = 7.6699e-04
Loss = 1.3925e-01, PNorm = 50.6957, GNorm = 0.6759, lr_0 = 7.6647e-04
Loss = 1.3787e-01, PNorm = 50.7150, GNorm = 0.6114, lr_0 = 7.6594e-04
Loss = 1.4100e-01, PNorm = 50.7333, GNorm = 1.8715, lr_0 = 7.6542e-04
Loss = 1.3398e-01, PNorm = 50.7581, GNorm = 1.0858, lr_0 = 7.6489e-04
Loss = 1.2231e-01, PNorm = 50.7738, GNorm = 0.9633, lr_0 = 7.6437e-04
Loss = 1.0987e-01, PNorm = 50.7848, GNorm = 1.1531, lr_0 = 7.6385e-04
Loss = 1.3020e-01, PNorm = 50.8019, GNorm = 1.8011, lr_0 = 7.6332e-04
Loss = 1.2433e-01, PNorm = 50.8243, GNorm = 0.6213, lr_0 = 7.6280e-04
Loss = 1.2886e-01, PNorm = 50.8432, GNorm = 1.0458, lr_0 = 7.6228e-04
Loss = 1.4212e-01, PNorm = 50.8506, GNorm = 0.8912, lr_0 = 7.6176e-04
Loss = 1.2307e-01, PNorm = 50.8719, GNorm = 1.2553, lr_0 = 7.6123e-04
Loss = 1.4478e-01, PNorm = 50.8948, GNorm = 1.8007, lr_0 = 7.6071e-04
Loss = 1.3523e-01, PNorm = 50.9128, GNorm = 0.6885, lr_0 = 7.6019e-04
Loss = 1.5033e-01, PNorm = 50.9370, GNorm = 0.9209, lr_0 = 7.5967e-04
Loss = 1.5730e-01, PNorm = 50.9596, GNorm = 1.1557, lr_0 = 7.5915e-04
Loss = 1.2559e-01, PNorm = 50.9676, GNorm = 2.1200, lr_0 = 7.5863e-04
Loss = 1.4650e-01, PNorm = 50.9759, GNorm = 1.1411, lr_0 = 7.5811e-04
Loss = 1.4716e-01, PNorm = 50.9977, GNorm = 0.7772, lr_0 = 7.5759e-04
Loss = 1.4827e-01, PNorm = 51.0185, GNorm = 2.9448, lr_0 = 7.5707e-04
Loss = 1.2309e-01, PNorm = 51.0335, GNorm = 0.9315, lr_0 = 7.5655e-04
Loss = 1.2547e-01, PNorm = 51.0549, GNorm = 0.7767, lr_0 = 7.5603e-04
Loss = 1.4905e-01, PNorm = 51.0697, GNorm = 1.9735, lr_0 = 7.5552e-04
Loss = 1.3758e-01, PNorm = 51.0866, GNorm = 0.9219, lr_0 = 7.5500e-04
Loss = 1.3909e-01, PNorm = 51.1032, GNorm = 1.0995, lr_0 = 7.5448e-04
Loss = 1.3767e-01, PNorm = 51.1233, GNorm = 0.7044, lr_0 = 7.5397e-04
Loss = 1.6259e-01, PNorm = 51.1488, GNorm = 2.9813, lr_0 = 7.5345e-04
Loss = 1.6305e-01, PNorm = 51.1739, GNorm = 0.7418, lr_0 = 7.5293e-04
Loss = 1.5495e-01, PNorm = 51.2005, GNorm = 1.5261, lr_0 = 7.5242e-04
Loss = 1.4066e-01, PNorm = 51.2246, GNorm = 0.7579, lr_0 = 7.5190e-04
Loss = 1.5082e-01, PNorm = 51.2542, GNorm = 3.1769, lr_0 = 7.5139e-04
Loss = 1.3351e-01, PNorm = 51.2882, GNorm = 1.3372, lr_0 = 7.5087e-04
Loss = 1.4275e-01, PNorm = 51.3121, GNorm = 1.3960, lr_0 = 7.5036e-04
Loss = 1.4267e-01, PNorm = 51.3354, GNorm = 1.2979, lr_0 = 7.4984e-04
Loss = 1.5976e-01, PNorm = 51.3572, GNorm = 0.8770, lr_0 = 7.4933e-04
Loss = 1.4735e-01, PNorm = 51.3810, GNorm = 0.7058, lr_0 = 7.4882e-04
Loss = 1.4974e-01, PNorm = 51.4116, GNorm = 0.6955, lr_0 = 7.4830e-04
Loss = 1.3048e-01, PNorm = 51.4366, GNorm = 1.2531, lr_0 = 7.4779e-04
Loss = 1.3213e-01, PNorm = 51.4566, GNorm = 0.8140, lr_0 = 7.4728e-04
Loss = 1.2432e-01, PNorm = 51.4671, GNorm = 0.6996, lr_0 = 7.4677e-04
Loss = 1.4426e-01, PNorm = 51.4798, GNorm = 0.6780, lr_0 = 7.4625e-04
Loss = 1.2463e-01, PNorm = 51.4960, GNorm = 1.1601, lr_0 = 7.4574e-04
Loss = 1.1393e-01, PNorm = 51.5086, GNorm = 0.5950, lr_0 = 7.4523e-04
Loss = 1.4286e-01, PNorm = 51.5205, GNorm = 2.5866, lr_0 = 7.4472e-04
Loss = 1.2587e-01, PNorm = 51.5366, GNorm = 0.8992, lr_0 = 7.4421e-04
Loss = 1.2369e-01, PNorm = 51.5524, GNorm = 0.8159, lr_0 = 7.4370e-04
Loss = 1.2383e-01, PNorm = 51.5703, GNorm = 0.8358, lr_0 = 7.4319e-04
Loss = 1.5688e-01, PNorm = 51.5804, GNorm = 1.3943, lr_0 = 7.4268e-04
Loss = 1.2067e-01, PNorm = 51.6018, GNorm = 1.1853, lr_0 = 7.4217e-04
Loss = 1.2517e-01, PNorm = 51.6212, GNorm = 0.4751, lr_0 = 7.4167e-04
Loss = 1.4997e-01, PNorm = 51.6402, GNorm = 0.7089, lr_0 = 7.4116e-04
Loss = 1.4594e-01, PNorm = 51.6576, GNorm = 1.7147, lr_0 = 7.4065e-04
Loss = 1.4354e-01, PNorm = 51.6776, GNorm = 0.7339, lr_0 = 7.4014e-04
Loss = 1.2806e-01, PNorm = 51.6978, GNorm = 0.7538, lr_0 = 7.3964e-04
Loss = 1.2738e-01, PNorm = 51.7162, GNorm = 0.9003, lr_0 = 7.3913e-04
Loss = 1.2137e-01, PNorm = 51.7337, GNorm = 0.7966, lr_0 = 7.3862e-04
Loss = 1.1971e-01, PNorm = 51.7491, GNorm = 1.2646, lr_0 = 7.3812e-04
Loss = 1.2272e-01, PNorm = 51.7667, GNorm = 0.8941, lr_0 = 7.3761e-04
Loss = 1.4829e-01, PNorm = 51.7826, GNorm = 1.0197, lr_0 = 7.3711e-04
Loss = 1.3135e-01, PNorm = 51.8057, GNorm = 1.2601, lr_0 = 7.3660e-04
Loss = 1.3760e-01, PNorm = 51.8255, GNorm = 1.0889, lr_0 = 7.3610e-04
Loss = 1.4641e-01, PNorm = 51.8396, GNorm = 1.8334, lr_0 = 7.3559e-04
Loss = 1.4304e-01, PNorm = 51.8583, GNorm = 1.2338, lr_0 = 7.3509e-04
Loss = 1.3823e-01, PNorm = 51.8705, GNorm = 0.7790, lr_0 = 7.3458e-04
Loss = 1.6435e-01, PNorm = 51.8855, GNorm = 1.3356, lr_0 = 7.3408e-04
Loss = 1.3217e-01, PNorm = 51.9072, GNorm = 1.6799, lr_0 = 7.3358e-04
Loss = 1.3064e-01, PNorm = 51.9218, GNorm = 0.5177, lr_0 = 7.3308e-04
Loss = 1.5303e-01, PNorm = 51.9358, GNorm = 0.7816, lr_0 = 7.3257e-04
Loss = 1.7957e-01, PNorm = 51.9686, GNorm = 2.3346, lr_0 = 7.3207e-04
Loss = 1.2316e-01, PNorm = 51.9999, GNorm = 0.7014, lr_0 = 7.3157e-04
Loss = 1.4265e-01, PNorm = 52.0174, GNorm = 0.8796, lr_0 = 7.3107e-04
Loss = 1.3810e-01, PNorm = 52.0368, GNorm = 1.0245, lr_0 = 7.3057e-04
Loss = 1.4122e-01, PNorm = 52.0596, GNorm = 0.7072, lr_0 = 7.3007e-04
Loss = 1.3739e-01, PNorm = 52.0768, GNorm = 0.7985, lr_0 = 7.2957e-04
Loss = 1.3710e-01, PNorm = 52.0918, GNorm = 1.9738, lr_0 = 7.2907e-04
Loss = 1.2251e-01, PNorm = 52.1097, GNorm = 0.7555, lr_0 = 7.2857e-04
Loss = 1.5389e-01, PNorm = 52.1270, GNorm = 1.7370, lr_0 = 7.2807e-04
Loss = 1.3754e-01, PNorm = 52.1369, GNorm = 0.5497, lr_0 = 7.2757e-04
Loss = 1.1585e-01, PNorm = 52.1536, GNorm = 0.7434, lr_0 = 7.2707e-04
Loss = 1.2771e-01, PNorm = 52.1728, GNorm = 0.7695, lr_0 = 7.2657e-04
Loss = 1.3471e-01, PNorm = 52.1867, GNorm = 0.7542, lr_0 = 7.2608e-04
Loss = 1.5446e-01, PNorm = 52.2093, GNorm = 0.9818, lr_0 = 7.2558e-04
Loss = 1.4215e-01, PNorm = 52.2310, GNorm = 0.8018, lr_0 = 7.2508e-04
Loss = 1.4216e-01, PNorm = 52.2418, GNorm = 1.1809, lr_0 = 7.2458e-04
Loss = 1.4285e-01, PNorm = 52.2583, GNorm = 0.9971, lr_0 = 7.2409e-04
Loss = 1.3766e-01, PNorm = 52.2836, GNorm = 1.1691, lr_0 = 7.2359e-04
Loss = 1.5425e-01, PNorm = 52.3097, GNorm = 0.6446, lr_0 = 7.2310e-04
Loss = 1.3666e-01, PNorm = 52.3186, GNorm = 1.5184, lr_0 = 7.2260e-04
Loss = 1.4439e-01, PNorm = 52.3350, GNorm = 0.8526, lr_0 = 7.2211e-04
Loss = 1.3634e-01, PNorm = 52.3546, GNorm = 0.8810, lr_0 = 7.2161e-04
Loss = 1.4056e-01, PNorm = 52.3758, GNorm = 1.1961, lr_0 = 7.2112e-04
Loss = 1.4126e-01, PNorm = 52.3958, GNorm = 1.3289, lr_0 = 7.2062e-04
Loss = 1.1465e-01, PNorm = 52.4089, GNorm = 0.7768, lr_0 = 7.2013e-04
Loss = 1.4651e-01, PNorm = 52.4255, GNorm = 2.0140, lr_0 = 7.1964e-04
Validation mae = 0.462402
Epoch 6
Loss = 1.2921e-01, PNorm = 52.4489, GNorm = 2.1900, lr_0 = 7.1914e-04
Loss = 1.3977e-01, PNorm = 52.4760, GNorm = 1.0325, lr_0 = 7.1865e-04
Loss = 1.2504e-01, PNorm = 52.4953, GNorm = 1.1705, lr_0 = 7.1816e-04
Loss = 1.2960e-01, PNorm = 52.5150, GNorm = 1.7494, lr_0 = 7.1767e-04
Loss = 1.4850e-01, PNorm = 52.5388, GNorm = 0.6687, lr_0 = 7.1717e-04
Loss = 1.3096e-01, PNorm = 52.5578, GNorm = 1.0466, lr_0 = 7.1668e-04
Loss = 1.3027e-01, PNorm = 52.5811, GNorm = 0.5434, lr_0 = 7.1619e-04
Loss = 1.2702e-01, PNorm = 52.6006, GNorm = 1.6723, lr_0 = 7.1570e-04
Loss = 1.1845e-01, PNorm = 52.6199, GNorm = 0.6327, lr_0 = 7.1521e-04
Loss = 1.1580e-01, PNorm = 52.6352, GNorm = 1.8289, lr_0 = 7.1472e-04
Loss = 1.1495e-01, PNorm = 52.6581, GNorm = 1.3442, lr_0 = 7.1423e-04
Loss = 1.2552e-01, PNorm = 52.6743, GNorm = 0.6688, lr_0 = 7.1374e-04
Loss = 1.4336e-01, PNorm = 52.6897, GNorm = 0.7086, lr_0 = 7.1325e-04
Loss = 1.4162e-01, PNorm = 52.7077, GNorm = 0.8606, lr_0 = 7.1277e-04
Loss = 1.3596e-01, PNorm = 52.7297, GNorm = 0.7173, lr_0 = 7.1228e-04
Loss = 1.3435e-01, PNorm = 52.7496, GNorm = 1.1553, lr_0 = 7.1179e-04
Loss = 1.1122e-01, PNorm = 52.7722, GNorm = 1.0705, lr_0 = 7.1130e-04
Loss = 1.2273e-01, PNorm = 52.7925, GNorm = 1.0977, lr_0 = 7.1081e-04
Loss = 1.0933e-01, PNorm = 52.8088, GNorm = 0.7834, lr_0 = 7.1033e-04
Loss = 1.3045e-01, PNorm = 52.8226, GNorm = 0.6581, lr_0 = 7.0984e-04
Loss = 1.1595e-01, PNorm = 52.8398, GNorm = 1.3112, lr_0 = 7.0935e-04
Loss = 1.4518e-01, PNorm = 52.8587, GNorm = 0.8029, lr_0 = 7.0887e-04
Loss = 1.2906e-01, PNorm = 52.8719, GNorm = 0.6731, lr_0 = 7.0838e-04
Loss = 1.2154e-01, PNorm = 52.8912, GNorm = 0.5423, lr_0 = 7.0790e-04
Loss = 1.0926e-01, PNorm = 52.9102, GNorm = 0.8647, lr_0 = 7.0741e-04
Loss = 1.1264e-01, PNorm = 52.9200, GNorm = 0.8168, lr_0 = 7.0693e-04
Loss = 1.1682e-01, PNorm = 52.9392, GNorm = 2.1907, lr_0 = 7.0644e-04
Loss = 1.4962e-01, PNorm = 52.9658, GNorm = 1.5943, lr_0 = 7.0596e-04
Loss = 1.1452e-01, PNorm = 52.9889, GNorm = 0.6375, lr_0 = 7.0548e-04
Loss = 1.1715e-01, PNorm = 53.0085, GNorm = 0.6837, lr_0 = 7.0499e-04
Loss = 1.1437e-01, PNorm = 53.0260, GNorm = 0.4705, lr_0 = 7.0451e-04
Loss = 1.4117e-01, PNorm = 53.0436, GNorm = 0.9414, lr_0 = 7.0403e-04
Loss = 1.1380e-01, PNorm = 53.0559, GNorm = 1.4015, lr_0 = 7.0354e-04
Loss = 1.4311e-01, PNorm = 53.0622, GNorm = 0.9652, lr_0 = 7.0306e-04
Loss = 1.3014e-01, PNorm = 53.0791, GNorm = 0.7640, lr_0 = 7.0258e-04
Loss = 1.2826e-01, PNorm = 53.0973, GNorm = 1.3083, lr_0 = 7.0210e-04
Loss = 1.1454e-01, PNorm = 53.1095, GNorm = 1.1741, lr_0 = 7.0162e-04
Loss = 1.2895e-01, PNorm = 53.1237, GNorm = 0.7915, lr_0 = 7.0114e-04
Loss = 1.3073e-01, PNorm = 53.1394, GNorm = 0.6948, lr_0 = 7.0066e-04
Loss = 1.1088e-01, PNorm = 53.1513, GNorm = 0.5668, lr_0 = 7.0018e-04
Loss = 1.2240e-01, PNorm = 53.1696, GNorm = 1.0562, lr_0 = 6.9970e-04
Loss = 1.3140e-01, PNorm = 53.1912, GNorm = 0.4950, lr_0 = 6.9922e-04
Loss = 1.2906e-01, PNorm = 53.2090, GNorm = 1.2489, lr_0 = 6.9874e-04
Loss = 1.4644e-01, PNorm = 53.2280, GNorm = 1.5512, lr_0 = 6.9826e-04
Loss = 1.1511e-01, PNorm = 53.2537, GNorm = 0.7380, lr_0 = 6.9778e-04
Loss = 1.1480e-01, PNorm = 53.2750, GNorm = 0.7170, lr_0 = 6.9730e-04
Loss = 1.2831e-01, PNorm = 53.2974, GNorm = 1.3714, lr_0 = 6.9683e-04
Loss = 1.3831e-01, PNorm = 53.3141, GNorm = 1.8877, lr_0 = 6.9635e-04
Loss = 1.4515e-01, PNorm = 53.3352, GNorm = 1.6881, lr_0 = 6.9587e-04
Loss = 1.4139e-01, PNorm = 53.3582, GNorm = 0.8785, lr_0 = 6.9540e-04
Loss = 1.2872e-01, PNorm = 53.3758, GNorm = 0.8211, lr_0 = 6.9492e-04
Loss = 1.2306e-01, PNorm = 53.3974, GNorm = 0.9794, lr_0 = 6.9444e-04
Loss = 1.3880e-01, PNorm = 53.4205, GNorm = 1.9669, lr_0 = 6.9397e-04
Loss = 1.2994e-01, PNorm = 53.4398, GNorm = 1.0275, lr_0 = 6.9349e-04
Loss = 1.2505e-01, PNorm = 53.4567, GNorm = 0.6212, lr_0 = 6.9302e-04
Loss = 1.1230e-01, PNorm = 53.4727, GNorm = 0.7562, lr_0 = 6.9254e-04
Loss = 1.1729e-01, PNorm = 53.4889, GNorm = 0.5054, lr_0 = 6.9207e-04
Loss = 1.2734e-01, PNorm = 53.4990, GNorm = 0.9595, lr_0 = 6.9159e-04
Loss = 1.2585e-01, PNorm = 53.5126, GNorm = 1.1800, lr_0 = 6.9112e-04
Loss = 1.3000e-01, PNorm = 53.5301, GNorm = 1.7167, lr_0 = 6.9065e-04
Loss = 1.2400e-01, PNorm = 53.5487, GNorm = 1.0938, lr_0 = 6.9017e-04
Loss = 1.1700e-01, PNorm = 53.5590, GNorm = 1.7049, lr_0 = 6.8970e-04
Loss = 1.2354e-01, PNorm = 53.5733, GNorm = 0.8646, lr_0 = 6.8923e-04
Loss = 1.2544e-01, PNorm = 53.5865, GNorm = 1.5986, lr_0 = 6.8876e-04
Loss = 1.1868e-01, PNorm = 53.6060, GNorm = 1.0942, lr_0 = 6.8828e-04
Loss = 1.1904e-01, PNorm = 53.6318, GNorm = 0.7681, lr_0 = 6.8781e-04
Loss = 1.1689e-01, PNorm = 53.6505, GNorm = 0.6069, lr_0 = 6.8734e-04
Loss = 1.2724e-01, PNorm = 53.6692, GNorm = 0.6288, lr_0 = 6.8687e-04
Loss = 1.4387e-01, PNorm = 53.6854, GNorm = 0.9449, lr_0 = 6.8640e-04
Loss = 1.2032e-01, PNorm = 53.7018, GNorm = 2.0930, lr_0 = 6.8593e-04
Loss = 1.3240e-01, PNorm = 53.7172, GNorm = 0.6247, lr_0 = 6.8546e-04
Loss = 1.4682e-01, PNorm = 53.7346, GNorm = 0.8194, lr_0 = 6.8499e-04
Loss = 1.3391e-01, PNorm = 53.7484, GNorm = 2.0659, lr_0 = 6.8452e-04
Loss = 1.2956e-01, PNorm = 53.7647, GNorm = 0.8650, lr_0 = 6.8405e-04
Loss = 1.6043e-01, PNorm = 53.7856, GNorm = 2.0627, lr_0 = 6.8358e-04
Loss = 1.3711e-01, PNorm = 53.8141, GNorm = 1.6874, lr_0 = 6.8312e-04
Loss = 1.2427e-01, PNorm = 53.8397, GNorm = 1.0394, lr_0 = 6.8265e-04
Loss = 1.4514e-01, PNorm = 53.8639, GNorm = 0.8729, lr_0 = 6.8218e-04
Loss = 1.2730e-01, PNorm = 53.8823, GNorm = 0.9924, lr_0 = 6.8171e-04
Loss = 1.0768e-01, PNorm = 53.9073, GNorm = 0.6383, lr_0 = 6.8125e-04
Loss = 1.1385e-01, PNorm = 53.9351, GNorm = 0.7897, lr_0 = 6.8078e-04
Loss = 1.5821e-01, PNorm = 53.9548, GNorm = 1.7340, lr_0 = 6.8031e-04
Loss = 1.3347e-01, PNorm = 53.9756, GNorm = 1.8250, lr_0 = 6.7985e-04
Loss = 1.2513e-01, PNorm = 53.9840, GNorm = 1.8016, lr_0 = 6.7938e-04
Loss = 1.5214e-01, PNorm = 54.0007, GNorm = 1.7689, lr_0 = 6.7892e-04
Loss = 1.1852e-01, PNorm = 54.0277, GNorm = 0.5765, lr_0 = 6.7845e-04
Loss = 1.1962e-01, PNorm = 54.0566, GNorm = 0.4991, lr_0 = 6.7799e-04
Loss = 1.2723e-01, PNorm = 54.0744, GNorm = 1.7763, lr_0 = 6.7752e-04
Loss = 1.2206e-01, PNorm = 54.0870, GNorm = 1.8599, lr_0 = 6.7706e-04
Loss = 1.4444e-01, PNorm = 54.1040, GNorm = 0.5671, lr_0 = 6.7659e-04
Loss = 1.0919e-01, PNorm = 54.1248, GNorm = 1.1883, lr_0 = 6.7613e-04
Loss = 1.4886e-01, PNorm = 54.1383, GNorm = 1.6843, lr_0 = 6.7567e-04
Loss = 1.5053e-01, PNorm = 54.1480, GNorm = 3.1296, lr_0 = 6.7520e-04
Loss = 1.4638e-01, PNorm = 54.1736, GNorm = 1.4692, lr_0 = 6.7474e-04
Loss = 1.1799e-01, PNorm = 54.1979, GNorm = 1.5936, lr_0 = 6.7428e-04
Loss = 1.2396e-01, PNorm = 54.2099, GNorm = 0.9881, lr_0 = 6.7382e-04
Loss = 1.4683e-01, PNorm = 54.2150, GNorm = 1.1491, lr_0 = 6.7335e-04
Loss = 1.1466e-01, PNorm = 54.2262, GNorm = 1.0254, lr_0 = 6.7289e-04
Loss = 1.3563e-01, PNorm = 54.2482, GNorm = 0.8755, lr_0 = 6.7243e-04
Loss = 1.3827e-01, PNorm = 54.2610, GNorm = 1.0432, lr_0 = 6.7197e-04
Loss = 1.3223e-01, PNorm = 54.2727, GNorm = 0.8178, lr_0 = 6.7151e-04
Loss = 1.3423e-01, PNorm = 54.2835, GNorm = 0.7180, lr_0 = 6.7105e-04
Loss = 1.4273e-01, PNorm = 54.3047, GNorm = 1.2773, lr_0 = 6.7059e-04
Loss = 1.1806e-01, PNorm = 54.3219, GNorm = 1.1826, lr_0 = 6.7013e-04
Loss = 1.1799e-01, PNorm = 54.3353, GNorm = 0.9299, lr_0 = 6.6967e-04
Loss = 1.1235e-01, PNorm = 54.3488, GNorm = 1.0098, lr_0 = 6.6921e-04
Loss = 1.1530e-01, PNorm = 54.3679, GNorm = 0.8826, lr_0 = 6.6876e-04
Loss = 1.2127e-01, PNorm = 54.3784, GNorm = 0.5837, lr_0 = 6.6830e-04
Loss = 1.3581e-01, PNorm = 54.3938, GNorm = 0.6605, lr_0 = 6.6784e-04
Loss = 1.2375e-01, PNorm = 54.4097, GNorm = 1.1045, lr_0 = 6.6738e-04
Loss = 1.2699e-01, PNorm = 54.4318, GNorm = 0.8947, lr_0 = 6.6693e-04
Loss = 1.2227e-01, PNorm = 54.4485, GNorm = 1.5661, lr_0 = 6.6647e-04
Loss = 1.3796e-01, PNorm = 54.4680, GNorm = 0.7984, lr_0 = 6.6601e-04
Loss = 1.3604e-01, PNorm = 54.4858, GNorm = 1.3113, lr_0 = 6.6556e-04
Loss = 1.2270e-01, PNorm = 54.5040, GNorm = 0.6565, lr_0 = 6.6510e-04
Loss = 1.1907e-01, PNorm = 54.5216, GNorm = 0.8348, lr_0 = 6.6464e-04
Loss = 1.2451e-01, PNorm = 54.5437, GNorm = 0.6362, lr_0 = 6.6419e-04
Loss = 1.0930e-01, PNorm = 54.5605, GNorm = 0.6114, lr_0 = 6.6373e-04
Loss = 1.3025e-01, PNorm = 54.5780, GNorm = 0.9545, lr_0 = 6.6328e-04
Loss = 1.3657e-01, PNorm = 54.5950, GNorm = 1.5385, lr_0 = 6.6282e-04
Validation mae = 0.439641
Epoch 7
Loss = 1.1269e-01, PNorm = 54.6077, GNorm = 1.2781, lr_0 = 6.6237e-04
Loss = 1.5953e-01, PNorm = 54.6351, GNorm = 1.6836, lr_0 = 6.6192e-04
Loss = 1.2049e-01, PNorm = 54.6615, GNorm = 0.9772, lr_0 = 6.6146e-04
Loss = 1.2365e-01, PNorm = 54.6842, GNorm = 1.5989, lr_0 = 6.6101e-04
Loss = 1.1490e-01, PNorm = 54.7115, GNorm = 1.2376, lr_0 = 6.6056e-04
Loss = 1.0406e-01, PNorm = 54.7294, GNorm = 0.6553, lr_0 = 6.6011e-04
Loss = 1.1159e-01, PNorm = 54.7384, GNorm = 0.6293, lr_0 = 6.5965e-04
Loss = 1.0891e-01, PNorm = 54.7534, GNorm = 1.1824, lr_0 = 6.5920e-04
Loss = 1.0381e-01, PNorm = 54.7659, GNorm = 0.7232, lr_0 = 6.5875e-04
Loss = 1.0104e-01, PNorm = 54.7873, GNorm = 0.6147, lr_0 = 6.5830e-04
Loss = 1.0767e-01, PNorm = 54.7960, GNorm = 0.7357, lr_0 = 6.5785e-04
Loss = 1.0854e-01, PNorm = 54.8080, GNorm = 0.9368, lr_0 = 6.5740e-04
Loss = 1.0955e-01, PNorm = 54.8242, GNorm = 0.6662, lr_0 = 6.5695e-04
Loss = 1.0335e-01, PNorm = 54.8432, GNorm = 0.8004, lr_0 = 6.5650e-04
Loss = 9.4968e-02, PNorm = 54.8540, GNorm = 0.7679, lr_0 = 6.5605e-04
Loss = 1.1555e-01, PNorm = 54.8707, GNorm = 0.7479, lr_0 = 6.5560e-04
Loss = 1.0885e-01, PNorm = 54.8873, GNorm = 0.9667, lr_0 = 6.5515e-04
Loss = 1.2738e-01, PNorm = 54.8995, GNorm = 1.6077, lr_0 = 6.5470e-04
Loss = 1.0199e-01, PNorm = 54.9139, GNorm = 0.7980, lr_0 = 6.5425e-04
Loss = 1.3278e-01, PNorm = 54.9281, GNorm = 0.6861, lr_0 = 6.5380e-04
Loss = 1.2113e-01, PNorm = 54.9460, GNorm = 0.5607, lr_0 = 6.5335e-04
Loss = 1.3491e-01, PNorm = 54.9643, GNorm = 1.4433, lr_0 = 6.5291e-04
Loss = 1.1096e-01, PNorm = 54.9838, GNorm = 0.8114, lr_0 = 6.5246e-04
Loss = 1.0091e-01, PNorm = 54.9993, GNorm = 0.5311, lr_0 = 6.5201e-04
Loss = 1.5278e-01, PNorm = 55.0188, GNorm = 0.7154, lr_0 = 6.5157e-04
Loss = 1.1127e-01, PNorm = 55.0393, GNorm = 0.6121, lr_0 = 6.5112e-04
Loss = 1.2114e-01, PNorm = 55.0482, GNorm = 1.5996, lr_0 = 6.5067e-04
Loss = 1.2300e-01, PNorm = 55.0652, GNorm = 0.7639, lr_0 = 6.5023e-04
Loss = 1.0630e-01, PNorm = 55.0889, GNorm = 0.9556, lr_0 = 6.4978e-04
Loss = 1.2121e-01, PNorm = 55.1053, GNorm = 0.9571, lr_0 = 6.4934e-04
Loss = 1.0606e-01, PNorm = 55.1185, GNorm = 1.2582, lr_0 = 6.4889e-04
Loss = 1.5105e-01, PNorm = 55.1365, GNorm = 1.0415, lr_0 = 6.4845e-04
Loss = 1.2979e-01, PNorm = 55.1696, GNorm = 0.8630, lr_0 = 6.4800e-04
Loss = 1.0993e-01, PNorm = 55.1969, GNorm = 2.0920, lr_0 = 6.4756e-04
Loss = 1.0854e-01, PNorm = 55.2146, GNorm = 0.8856, lr_0 = 6.4712e-04
Loss = 1.3920e-01, PNorm = 55.2319, GNorm = 0.7960, lr_0 = 6.4667e-04
Loss = 1.2133e-01, PNorm = 55.2492, GNorm = 0.8828, lr_0 = 6.4623e-04
Loss = 1.0849e-01, PNorm = 55.2624, GNorm = 0.7453, lr_0 = 6.4579e-04
Loss = 1.1582e-01, PNorm = 55.2793, GNorm = 1.3848, lr_0 = 6.4534e-04
Loss = 1.1307e-01, PNorm = 55.2926, GNorm = 0.7240, lr_0 = 6.4490e-04
Loss = 1.3564e-01, PNorm = 55.3138, GNorm = 1.1347, lr_0 = 6.4446e-04
Loss = 1.3037e-01, PNorm = 55.3350, GNorm = 0.5071, lr_0 = 6.4402e-04
Loss = 1.1545e-01, PNorm = 55.3499, GNorm = 0.9226, lr_0 = 6.4358e-04
Loss = 1.0010e-01, PNorm = 55.3627, GNorm = 0.8102, lr_0 = 6.4314e-04
Loss = 1.1737e-01, PNorm = 55.3734, GNorm = 1.0705, lr_0 = 6.4270e-04
Loss = 1.1603e-01, PNorm = 55.3830, GNorm = 1.1735, lr_0 = 6.4226e-04
Loss = 1.2217e-01, PNorm = 55.3968, GNorm = 0.7507, lr_0 = 6.4182e-04
Loss = 1.2399e-01, PNorm = 55.4115, GNorm = 1.2852, lr_0 = 6.4138e-04
Loss = 1.1827e-01, PNorm = 55.4293, GNorm = 0.8377, lr_0 = 6.4094e-04
Loss = 1.1764e-01, PNorm = 55.4498, GNorm = 1.9693, lr_0 = 6.4050e-04
Loss = 1.4535e-01, PNorm = 55.4744, GNorm = 1.6137, lr_0 = 6.4006e-04
Loss = 1.3942e-01, PNorm = 55.5020, GNorm = 0.5814, lr_0 = 6.3962e-04
Loss = 1.4310e-01, PNorm = 55.5288, GNorm = 1.0078, lr_0 = 6.3918e-04
Loss = 1.2087e-01, PNorm = 55.5523, GNorm = 0.7423, lr_0 = 6.3874e-04
Loss = 1.1880e-01, PNorm = 55.5675, GNorm = 0.8506, lr_0 = 6.3831e-04
Loss = 1.2746e-01, PNorm = 55.5807, GNorm = 1.7403, lr_0 = 6.3787e-04
Loss = 1.1625e-01, PNorm = 55.5982, GNorm = 0.7167, lr_0 = 6.3743e-04
Loss = 1.1433e-01, PNorm = 55.6108, GNorm = 1.2361, lr_0 = 6.3700e-04
Loss = 1.1990e-01, PNorm = 55.6247, GNorm = 0.5813, lr_0 = 6.3656e-04
Loss = 1.2586e-01, PNorm = 55.6339, GNorm = 0.6423, lr_0 = 6.3612e-04
Loss = 1.2804e-01, PNorm = 55.6516, GNorm = 0.6674, lr_0 = 6.3569e-04
Loss = 1.2101e-01, PNorm = 55.6737, GNorm = 0.6140, lr_0 = 6.3525e-04
Loss = 1.1465e-01, PNorm = 55.6936, GNorm = 0.8766, lr_0 = 6.3482e-04
Loss = 1.3177e-01, PNorm = 55.7007, GNorm = 0.7223, lr_0 = 6.3438e-04
Loss = 1.2028e-01, PNorm = 55.7227, GNorm = 0.8300, lr_0 = 6.3395e-04
Loss = 1.3247e-01, PNorm = 55.7532, GNorm = 0.8721, lr_0 = 6.3351e-04
Loss = 1.1761e-01, PNorm = 55.7729, GNorm = 0.8043, lr_0 = 6.3308e-04
Loss = 1.0913e-01, PNorm = 55.7803, GNorm = 0.8521, lr_0 = 6.3265e-04
Loss = 1.0999e-01, PNorm = 55.7923, GNorm = 0.7133, lr_0 = 6.3221e-04
Loss = 1.0740e-01, PNorm = 55.8067, GNorm = 0.9499, lr_0 = 6.3178e-04
Loss = 1.1159e-01, PNorm = 55.8254, GNorm = 0.7203, lr_0 = 6.3135e-04
Loss = 1.1886e-01, PNorm = 55.8482, GNorm = 0.5503, lr_0 = 6.3091e-04
Loss = 1.2509e-01, PNorm = 55.8671, GNorm = 0.7682, lr_0 = 6.3048e-04
Loss = 1.1803e-01, PNorm = 55.8860, GNorm = 1.0234, lr_0 = 6.3005e-04
Loss = 1.2189e-01, PNorm = 55.9028, GNorm = 0.7747, lr_0 = 6.2962e-04
Loss = 1.2167e-01, PNorm = 55.9231, GNorm = 1.1726, lr_0 = 6.2919e-04
Loss = 9.1643e-02, PNorm = 55.9361, GNorm = 0.6717, lr_0 = 6.2876e-04
Loss = 1.1848e-01, PNorm = 55.9484, GNorm = 0.9378, lr_0 = 6.2833e-04
Loss = 1.0956e-01, PNorm = 55.9664, GNorm = 0.7083, lr_0 = 6.2789e-04
Loss = 1.2584e-01, PNorm = 55.9795, GNorm = 0.7783, lr_0 = 6.2746e-04
Loss = 1.3162e-01, PNorm = 55.9918, GNorm = 1.1634, lr_0 = 6.2703e-04
Loss = 1.0283e-01, PNorm = 56.0094, GNorm = 1.1694, lr_0 = 6.2661e-04
Loss = 1.1689e-01, PNorm = 56.0245, GNorm = 0.9047, lr_0 = 6.2618e-04
Loss = 1.1416e-01, PNorm = 56.0364, GNorm = 1.4606, lr_0 = 6.2575e-04
Loss = 1.1772e-01, PNorm = 56.0455, GNorm = 0.6704, lr_0 = 6.2532e-04
Loss = 1.3821e-01, PNorm = 56.0593, GNorm = 0.7727, lr_0 = 6.2489e-04
Loss = 1.1033e-01, PNorm = 56.0781, GNorm = 0.7199, lr_0 = 6.2446e-04
Loss = 1.2272e-01, PNorm = 56.0966, GNorm = 0.7075, lr_0 = 6.2403e-04
Loss = 1.1299e-01, PNorm = 56.1114, GNorm = 0.6761, lr_0 = 6.2361e-04
Loss = 1.0919e-01, PNorm = 56.1298, GNorm = 0.7979, lr_0 = 6.2318e-04
Loss = 1.2717e-01, PNorm = 56.1443, GNorm = 1.7724, lr_0 = 6.2275e-04
Loss = 1.2945e-01, PNorm = 56.1570, GNorm = 1.3856, lr_0 = 6.2233e-04
Loss = 1.0563e-01, PNorm = 56.1734, GNorm = 2.1867, lr_0 = 6.2190e-04
Loss = 1.2400e-01, PNorm = 56.1879, GNorm = 0.6071, lr_0 = 6.2147e-04
Loss = 1.0524e-01, PNorm = 56.2068, GNorm = 0.7771, lr_0 = 6.2105e-04
Loss = 1.0671e-01, PNorm = 56.2233, GNorm = 1.1458, lr_0 = 6.2062e-04
Loss = 1.2122e-01, PNorm = 56.2374, GNorm = 0.5941, lr_0 = 6.2020e-04
Loss = 1.0782e-01, PNorm = 56.2526, GNorm = 0.6104, lr_0 = 6.1977e-04
Loss = 1.0298e-01, PNorm = 56.2675, GNorm = 0.5538, lr_0 = 6.1935e-04
Loss = 1.0382e-01, PNorm = 56.2778, GNorm = 0.5907, lr_0 = 6.1892e-04
Loss = 1.1136e-01, PNorm = 56.2909, GNorm = 1.1463, lr_0 = 6.1850e-04
Loss = 1.2067e-01, PNorm = 56.2970, GNorm = 0.9271, lr_0 = 6.1808e-04
Loss = 1.1307e-01, PNorm = 56.3133, GNorm = 1.0974, lr_0 = 6.1765e-04
Loss = 1.2896e-01, PNorm = 56.3329, GNorm = 1.2816, lr_0 = 6.1723e-04
Loss = 1.1807e-01, PNorm = 56.3459, GNorm = 0.8946, lr_0 = 6.1681e-04
Loss = 1.1631e-01, PNorm = 56.3594, GNorm = 0.5463, lr_0 = 6.1638e-04
Loss = 1.1868e-01, PNorm = 56.3782, GNorm = 0.7177, lr_0 = 6.1596e-04
Loss = 1.3566e-01, PNorm = 56.3968, GNorm = 0.7899, lr_0 = 6.1554e-04
Loss = 1.1943e-01, PNorm = 56.4060, GNorm = 1.1909, lr_0 = 6.1512e-04
Loss = 1.0793e-01, PNorm = 56.4144, GNorm = 0.7080, lr_0 = 6.1470e-04
Loss = 9.9083e-02, PNorm = 56.4300, GNorm = 0.5998, lr_0 = 6.1428e-04
Loss = 1.1696e-01, PNorm = 56.4451, GNorm = 0.6727, lr_0 = 6.1385e-04
Loss = 1.0185e-01, PNorm = 56.4555, GNorm = 0.4859, lr_0 = 6.1343e-04
Loss = 1.1085e-01, PNorm = 56.4655, GNorm = 1.7205, lr_0 = 6.1301e-04
Loss = 1.1134e-01, PNorm = 56.4725, GNorm = 0.7651, lr_0 = 6.1259e-04
Loss = 1.1398e-01, PNorm = 56.4852, GNorm = 1.1308, lr_0 = 6.1217e-04
Loss = 1.2090e-01, PNorm = 56.5027, GNorm = 1.2832, lr_0 = 6.1175e-04
Loss = 1.2122e-01, PNorm = 56.5193, GNorm = 1.4540, lr_0 = 6.1134e-04
Loss = 1.2354e-01, PNorm = 56.5321, GNorm = 0.7863, lr_0 = 6.1092e-04
Loss = 1.1643e-01, PNorm = 56.5440, GNorm = 1.6055, lr_0 = 6.1050e-04
Validation mae = 0.414407
Epoch 8
Loss = 1.0361e-01, PNorm = 56.5554, GNorm = 0.6260, lr_0 = 6.1008e-04
Loss = 1.0266e-01, PNorm = 56.5685, GNorm = 0.6189, lr_0 = 6.0966e-04
Loss = 1.0135e-01, PNorm = 56.5866, GNorm = 0.8475, lr_0 = 6.0924e-04
Loss = 9.9805e-02, PNorm = 56.6020, GNorm = 0.7873, lr_0 = 6.0883e-04
Loss = 9.4149e-02, PNorm = 56.6185, GNorm = 0.9754, lr_0 = 6.0841e-04
Loss = 1.1005e-01, PNorm = 56.6335, GNorm = 1.5548, lr_0 = 6.0799e-04
Loss = 8.7995e-02, PNorm = 56.6421, GNorm = 1.0603, lr_0 = 6.0758e-04
Loss = 1.1451e-01, PNorm = 56.6522, GNorm = 1.9577, lr_0 = 6.0716e-04
Loss = 1.2115e-01, PNorm = 56.6686, GNorm = 1.2072, lr_0 = 6.0674e-04
Loss = 1.0870e-01, PNorm = 56.6870, GNorm = 0.7139, lr_0 = 6.0633e-04
Loss = 1.0016e-01, PNorm = 56.7022, GNorm = 0.3738, lr_0 = 6.0591e-04
Loss = 1.1961e-01, PNorm = 56.7165, GNorm = 0.6838, lr_0 = 6.0550e-04
Loss = 1.0935e-01, PNorm = 56.7356, GNorm = 0.6021, lr_0 = 6.0508e-04
Loss = 9.3493e-02, PNorm = 56.7492, GNorm = 1.4696, lr_0 = 6.0467e-04
Loss = 1.0470e-01, PNorm = 56.7647, GNorm = 1.2204, lr_0 = 6.0425e-04
Loss = 1.0142e-01, PNorm = 56.7781, GNorm = 1.0436, lr_0 = 6.0384e-04
Loss = 1.1102e-01, PNorm = 56.7906, GNorm = 0.9629, lr_0 = 6.0343e-04
Loss = 9.4019e-02, PNorm = 56.8089, GNorm = 1.5039, lr_0 = 6.0301e-04
Loss = 1.0752e-01, PNorm = 56.8261, GNorm = 1.0459, lr_0 = 6.0260e-04
Loss = 1.0804e-01, PNorm = 56.8428, GNorm = 0.7204, lr_0 = 6.0219e-04
Loss = 1.0864e-01, PNorm = 56.8556, GNorm = 0.6764, lr_0 = 6.0178e-04
Loss = 9.9434e-02, PNorm = 56.8711, GNorm = 0.7763, lr_0 = 6.0136e-04
Loss = 1.0334e-01, PNorm = 56.8876, GNorm = 0.5585, lr_0 = 6.0095e-04
Loss = 1.1119e-01, PNorm = 56.9009, GNorm = 1.1874, lr_0 = 6.0054e-04
Loss = 1.0473e-01, PNorm = 56.9162, GNorm = 0.8168, lr_0 = 6.0013e-04
Loss = 1.0410e-01, PNorm = 56.9291, GNorm = 0.4918, lr_0 = 5.9972e-04
Loss = 1.1553e-01, PNorm = 56.9521, GNorm = 1.4770, lr_0 = 5.9931e-04
Loss = 1.0434e-01, PNorm = 56.9757, GNorm = 0.8205, lr_0 = 5.9890e-04
Loss = 1.1482e-01, PNorm = 56.9876, GNorm = 1.3395, lr_0 = 5.9849e-04
Loss = 1.0953e-01, PNorm = 56.9996, GNorm = 0.6281, lr_0 = 5.9808e-04
Loss = 9.7569e-02, PNorm = 57.0095, GNorm = 0.5361, lr_0 = 5.9767e-04
Loss = 9.8674e-02, PNorm = 57.0235, GNorm = 0.7940, lr_0 = 5.9726e-04
Loss = 1.0961e-01, PNorm = 57.0406, GNorm = 0.5536, lr_0 = 5.9685e-04
Loss = 9.4530e-02, PNorm = 57.0612, GNorm = 0.7488, lr_0 = 5.9644e-04
Loss = 1.0744e-01, PNorm = 57.0741, GNorm = 0.6864, lr_0 = 5.9603e-04
Loss = 1.1286e-01, PNorm = 57.0892, GNorm = 0.7360, lr_0 = 5.9562e-04
Loss = 1.0983e-01, PNorm = 57.1041, GNorm = 1.5905, lr_0 = 5.9521e-04
Loss = 1.2169e-01, PNorm = 57.1255, GNorm = 0.9442, lr_0 = 5.9481e-04
Loss = 1.2325e-01, PNorm = 57.1452, GNorm = 0.6395, lr_0 = 5.9440e-04
Loss = 1.2408e-01, PNorm = 57.1649, GNorm = 0.6554, lr_0 = 5.9399e-04
Loss = 1.0365e-01, PNorm = 57.1826, GNorm = 0.9591, lr_0 = 5.9358e-04
Loss = 1.1274e-01, PNorm = 57.1962, GNorm = 0.7410, lr_0 = 5.9318e-04
Loss = 9.4312e-02, PNorm = 57.2108, GNorm = 0.5233, lr_0 = 5.9277e-04
Loss = 1.0559e-01, PNorm = 57.2242, GNorm = 0.7248, lr_0 = 5.9236e-04
Loss = 9.8759e-02, PNorm = 57.2354, GNorm = 0.7473, lr_0 = 5.9196e-04
Loss = 8.4813e-02, PNorm = 57.2517, GNorm = 0.8672, lr_0 = 5.9155e-04
Loss = 9.9076e-02, PNorm = 57.2695, GNorm = 0.5867, lr_0 = 5.9115e-04
Loss = 1.1442e-01, PNorm = 57.2796, GNorm = 1.4885, lr_0 = 5.9074e-04
Loss = 9.7786e-02, PNorm = 57.2917, GNorm = 0.8337, lr_0 = 5.9034e-04
Loss = 1.1473e-01, PNorm = 57.3076, GNorm = 1.4045, lr_0 = 5.8993e-04
Loss = 1.1061e-01, PNorm = 57.3247, GNorm = 1.9440, lr_0 = 5.8953e-04
Loss = 1.1780e-01, PNorm = 57.3422, GNorm = 0.7532, lr_0 = 5.8913e-04
Loss = 1.1308e-01, PNorm = 57.3622, GNorm = 1.0234, lr_0 = 5.8872e-04
Loss = 1.1922e-01, PNorm = 57.3714, GNorm = 0.5434, lr_0 = 5.8832e-04
Loss = 9.9439e-02, PNorm = 57.3793, GNorm = 0.6863, lr_0 = 5.8792e-04
Loss = 1.1575e-01, PNorm = 57.3947, GNorm = 0.6073, lr_0 = 5.8751e-04
Loss = 9.7174e-02, PNorm = 57.4093, GNorm = 0.8543, lr_0 = 5.8711e-04
Loss = 1.1121e-01, PNorm = 57.4206, GNorm = 1.0198, lr_0 = 5.8671e-04
Loss = 1.1717e-01, PNorm = 57.4325, GNorm = 1.8856, lr_0 = 5.8631e-04
Loss = 1.2814e-01, PNorm = 57.4453, GNorm = 0.8667, lr_0 = 5.8591e-04
Loss = 1.1111e-01, PNorm = 57.4589, GNorm = 0.9177, lr_0 = 5.8550e-04
Loss = 9.8686e-02, PNorm = 57.4755, GNorm = 0.9167, lr_0 = 5.8510e-04
Loss = 1.0681e-01, PNorm = 57.4957, GNorm = 0.9369, lr_0 = 5.8470e-04
Loss = 1.0655e-01, PNorm = 57.5145, GNorm = 0.7017, lr_0 = 5.8430e-04
Loss = 1.0477e-01, PNorm = 57.5291, GNorm = 0.8655, lr_0 = 5.8390e-04
Loss = 1.1651e-01, PNorm = 57.5466, GNorm = 0.5441, lr_0 = 5.8350e-04
Loss = 1.1378e-01, PNorm = 57.5611, GNorm = 1.2921, lr_0 = 5.8310e-04
Loss = 1.1815e-01, PNorm = 57.5747, GNorm = 1.6469, lr_0 = 5.8270e-04
Loss = 1.1586e-01, PNorm = 57.5870, GNorm = 0.7339, lr_0 = 5.8230e-04
Loss = 1.1030e-01, PNorm = 57.6044, GNorm = 0.5981, lr_0 = 5.8190e-04
Loss = 1.0210e-01, PNorm = 57.6183, GNorm = 0.6162, lr_0 = 5.8151e-04
Loss = 1.1449e-01, PNorm = 57.6329, GNorm = 1.3759, lr_0 = 5.8111e-04
Loss = 1.1186e-01, PNorm = 57.6509, GNorm = 1.3836, lr_0 = 5.8071e-04
Loss = 1.1604e-01, PNorm = 57.6680, GNorm = 0.7849, lr_0 = 5.8031e-04
Loss = 1.0190e-01, PNorm = 57.6849, GNorm = 0.7492, lr_0 = 5.7991e-04
Loss = 1.0840e-01, PNorm = 57.6986, GNorm = 0.7383, lr_0 = 5.7952e-04
Loss = 1.1611e-01, PNorm = 57.7101, GNorm = 1.0488, lr_0 = 5.7912e-04
Loss = 1.0952e-01, PNorm = 57.7255, GNorm = 0.7686, lr_0 = 5.7872e-04
Loss = 1.0157e-01, PNorm = 57.7375, GNorm = 1.1214, lr_0 = 5.7833e-04
Loss = 9.9996e-02, PNorm = 57.7499, GNorm = 0.5903, lr_0 = 5.7793e-04
Loss = 1.2834e-01, PNorm = 57.7635, GNorm = 0.7056, lr_0 = 5.7753e-04
Loss = 1.1502e-01, PNorm = 57.7808, GNorm = 1.2837, lr_0 = 5.7714e-04
Loss = 1.0673e-01, PNorm = 57.8002, GNorm = 1.2227, lr_0 = 5.7674e-04
Loss = 1.1022e-01, PNorm = 57.8220, GNorm = 0.4287, lr_0 = 5.7635e-04
Loss = 1.0898e-01, PNorm = 57.8382, GNorm = 0.6902, lr_0 = 5.7595e-04
Loss = 1.1018e-01, PNorm = 57.8484, GNorm = 0.7129, lr_0 = 5.7556e-04
Loss = 1.1712e-01, PNorm = 57.8581, GNorm = 0.8404, lr_0 = 5.7516e-04
Loss = 1.1871e-01, PNorm = 57.8743, GNorm = 2.0326, lr_0 = 5.7477e-04
Loss = 1.1655e-01, PNorm = 57.8897, GNorm = 0.7688, lr_0 = 5.7438e-04
Loss = 1.0652e-01, PNorm = 57.9073, GNorm = 0.6926, lr_0 = 5.7398e-04
Loss = 1.2512e-01, PNorm = 57.9233, GNorm = 1.2849, lr_0 = 5.7359e-04
Loss = 1.0127e-01, PNorm = 57.9378, GNorm = 1.0391, lr_0 = 5.7320e-04
Loss = 1.0652e-01, PNorm = 57.9492, GNorm = 0.6046, lr_0 = 5.7280e-04
Loss = 1.1492e-01, PNorm = 57.9617, GNorm = 0.6278, lr_0 = 5.7241e-04
Loss = 1.0901e-01, PNorm = 57.9748, GNorm = 1.1562, lr_0 = 5.7202e-04
Loss = 1.0368e-01, PNorm = 57.9892, GNorm = 1.1004, lr_0 = 5.7163e-04
Loss = 1.0171e-01, PNorm = 58.0063, GNorm = 0.5627, lr_0 = 5.7124e-04
Loss = 1.1649e-01, PNorm = 58.0185, GNorm = 0.7143, lr_0 = 5.7084e-04
Loss = 1.2445e-01, PNorm = 58.0372, GNorm = 0.9211, lr_0 = 5.7045e-04
Loss = 1.1642e-01, PNorm = 58.0553, GNorm = 0.6637, lr_0 = 5.7006e-04
Loss = 9.5360e-02, PNorm = 58.0652, GNorm = 0.4284, lr_0 = 5.6967e-04
Loss = 9.9598e-02, PNorm = 58.0807, GNorm = 0.8066, lr_0 = 5.6928e-04
Loss = 1.1337e-01, PNorm = 58.0950, GNorm = 0.5366, lr_0 = 5.6889e-04
Loss = 9.4057e-02, PNorm = 58.1102, GNorm = 0.9505, lr_0 = 5.6850e-04
Loss = 1.0332e-01, PNorm = 58.1299, GNorm = 0.5343, lr_0 = 5.6811e-04
Loss = 1.0247e-01, PNorm = 58.1416, GNorm = 0.5832, lr_0 = 5.6772e-04
Loss = 1.0710e-01, PNorm = 58.1477, GNorm = 1.2267, lr_0 = 5.6733e-04
Loss = 1.1126e-01, PNorm = 58.1599, GNorm = 1.1532, lr_0 = 5.6695e-04
Loss = 1.2456e-01, PNorm = 58.1722, GNorm = 0.8236, lr_0 = 5.6656e-04
Loss = 1.3787e-01, PNorm = 58.1865, GNorm = 0.9231, lr_0 = 5.6617e-04
Loss = 1.1486e-01, PNorm = 58.2004, GNorm = 1.5586, lr_0 = 5.6578e-04
Loss = 1.1384e-01, PNorm = 58.2186, GNorm = 0.8167, lr_0 = 5.6539e-04
Loss = 1.0164e-01, PNorm = 58.2346, GNorm = 0.9222, lr_0 = 5.6501e-04
Loss = 9.9009e-02, PNorm = 58.2514, GNorm = 1.1037, lr_0 = 5.6462e-04
Loss = 1.1549e-01, PNorm = 58.2656, GNorm = 1.2569, lr_0 = 5.6423e-04
Loss = 1.0479e-01, PNorm = 58.2785, GNorm = 0.9164, lr_0 = 5.6385e-04
Loss = 1.1081e-01, PNorm = 58.2920, GNorm = 1.1199, lr_0 = 5.6346e-04
Loss = 1.0703e-01, PNorm = 58.3018, GNorm = 0.7418, lr_0 = 5.6307e-04
Loss = 1.1356e-01, PNorm = 58.3157, GNorm = 1.1994, lr_0 = 5.6269e-04
Loss = 1.0086e-01, PNorm = 58.3254, GNorm = 0.6258, lr_0 = 5.6230e-04
Validation mae = 0.436440
Epoch 9
Loss = 1.0091e-01, PNorm = 58.3386, GNorm = 0.6934, lr_0 = 5.6192e-04
Loss = 1.0499e-01, PNorm = 58.3543, GNorm = 0.5724, lr_0 = 5.6153e-04
Loss = 9.9529e-02, PNorm = 58.3722, GNorm = 0.8459, lr_0 = 5.6115e-04
Loss = 9.1278e-02, PNorm = 58.3933, GNorm = 0.6253, lr_0 = 5.6076e-04
Loss = 8.5281e-02, PNorm = 58.4127, GNorm = 0.9092, lr_0 = 5.6038e-04
Loss = 9.5702e-02, PNorm = 58.4281, GNorm = 0.5818, lr_0 = 5.6000e-04
Loss = 1.0174e-01, PNorm = 58.4411, GNorm = 0.5339, lr_0 = 5.5961e-04
Loss = 9.1332e-02, PNorm = 58.4548, GNorm = 0.7781, lr_0 = 5.5923e-04
Loss = 1.0733e-01, PNorm = 58.4726, GNorm = 1.3841, lr_0 = 5.5885e-04
Loss = 1.0743e-01, PNorm = 58.4946, GNorm = 0.8942, lr_0 = 5.5846e-04
Loss = 1.0895e-01, PNorm = 58.5078, GNorm = 0.8774, lr_0 = 5.5808e-04
Loss = 9.9765e-02, PNorm = 58.5237, GNorm = 0.6275, lr_0 = 5.5770e-04
Loss = 9.0720e-02, PNorm = 58.5390, GNorm = 1.0049, lr_0 = 5.5732e-04
Loss = 1.0345e-01, PNorm = 58.5493, GNorm = 0.6135, lr_0 = 5.5693e-04
Loss = 1.0184e-01, PNorm = 58.5684, GNorm = 0.7452, lr_0 = 5.5655e-04
Loss = 1.0348e-01, PNorm = 58.5865, GNorm = 0.5242, lr_0 = 5.5617e-04
Loss = 8.7871e-02, PNorm = 58.6016, GNorm = 0.6252, lr_0 = 5.5579e-04
Loss = 9.1386e-02, PNorm = 58.6207, GNorm = 1.4251, lr_0 = 5.5541e-04
Loss = 1.0208e-01, PNorm = 58.6369, GNorm = 1.8759, lr_0 = 5.5503e-04
Loss = 9.7662e-02, PNorm = 58.6500, GNorm = 0.8091, lr_0 = 5.5465e-04
Loss = 1.0298e-01, PNorm = 58.6608, GNorm = 0.6693, lr_0 = 5.5427e-04
Loss = 1.0241e-01, PNorm = 58.6723, GNorm = 1.2963, lr_0 = 5.5389e-04
Loss = 9.8866e-02, PNorm = 58.6846, GNorm = 0.5968, lr_0 = 5.5351e-04
Loss = 1.0583e-01, PNorm = 58.6987, GNorm = 1.1157, lr_0 = 5.5313e-04
Loss = 8.8752e-02, PNorm = 58.7143, GNorm = 0.9836, lr_0 = 5.5275e-04
Loss = 1.0612e-01, PNorm = 58.7227, GNorm = 0.7418, lr_0 = 5.5237e-04
Loss = 9.4949e-02, PNorm = 58.7334, GNorm = 0.9409, lr_0 = 5.5199e-04
Loss = 8.7925e-02, PNorm = 58.7451, GNorm = 0.8252, lr_0 = 5.5162e-04
Loss = 1.0317e-01, PNorm = 58.7612, GNorm = 0.8564, lr_0 = 5.5124e-04
Loss = 1.0071e-01, PNorm = 58.7717, GNorm = 0.5980, lr_0 = 5.5086e-04
Loss = 8.3138e-02, PNorm = 58.7826, GNorm = 0.5095, lr_0 = 5.5048e-04
Loss = 1.0150e-01, PNorm = 58.7916, GNorm = 0.7553, lr_0 = 5.5011e-04
Loss = 8.8474e-02, PNorm = 58.8029, GNorm = 0.5629, lr_0 = 5.4973e-04
Loss = 8.6389e-02, PNorm = 58.8116, GNorm = 0.6350, lr_0 = 5.4935e-04
Loss = 9.2981e-02, PNorm = 58.8254, GNorm = 0.6852, lr_0 = 5.4898e-04
Loss = 1.0595e-01, PNorm = 58.8417, GNorm = 1.5073, lr_0 = 5.4860e-04
Loss = 8.8388e-02, PNorm = 58.8604, GNorm = 0.8322, lr_0 = 5.4822e-04
Loss = 1.2623e-01, PNorm = 58.8767, GNorm = 0.6515, lr_0 = 5.4785e-04
Loss = 9.9719e-02, PNorm = 58.8870, GNorm = 0.6458, lr_0 = 5.4747e-04
Loss = 8.1827e-02, PNorm = 58.8964, GNorm = 0.8418, lr_0 = 5.4710e-04
Loss = 1.1124e-01, PNorm = 58.9071, GNorm = 0.6075, lr_0 = 5.4672e-04
Loss = 1.0819e-01, PNorm = 58.9251, GNorm = 0.6724, lr_0 = 5.4635e-04
Loss = 9.8261e-02, PNorm = 58.9406, GNorm = 0.4664, lr_0 = 5.4597e-04
Loss = 1.0668e-01, PNorm = 58.9614, GNorm = 0.5999, lr_0 = 5.4560e-04
Loss = 8.3993e-02, PNorm = 58.9771, GNorm = 0.5219, lr_0 = 5.4523e-04
Loss = 9.6780e-02, PNorm = 58.9929, GNorm = 0.6212, lr_0 = 5.4485e-04
Loss = 9.1677e-02, PNorm = 59.0061, GNorm = 0.4466, lr_0 = 5.4448e-04
Loss = 9.6246e-02, PNorm = 59.0191, GNorm = 0.7342, lr_0 = 5.4411e-04
Loss = 1.0683e-01, PNorm = 59.0293, GNorm = 1.1261, lr_0 = 5.4373e-04
Loss = 1.0483e-01, PNorm = 59.0417, GNorm = 0.9099, lr_0 = 5.4336e-04
Loss = 1.0065e-01, PNorm = 59.0563, GNorm = 0.7266, lr_0 = 5.4299e-04
Loss = 8.8780e-02, PNorm = 59.0713, GNorm = 0.5412, lr_0 = 5.4262e-04
Loss = 9.9982e-02, PNorm = 59.0827, GNorm = 0.6030, lr_0 = 5.4225e-04
Loss = 1.0476e-01, PNorm = 59.1004, GNorm = 0.5597, lr_0 = 5.4187e-04
Loss = 1.2620e-01, PNorm = 59.1172, GNorm = 0.7368, lr_0 = 5.4150e-04
Loss = 1.0192e-01, PNorm = 59.1238, GNorm = 1.3964, lr_0 = 5.4113e-04
Loss = 9.5220e-02, PNorm = 59.1371, GNorm = 1.0108, lr_0 = 5.4076e-04
Loss = 9.6780e-02, PNorm = 59.1509, GNorm = 0.6445, lr_0 = 5.4039e-04
Loss = 9.2748e-02, PNorm = 59.1635, GNorm = 0.4062, lr_0 = 5.4002e-04
Loss = 1.0339e-01, PNorm = 59.1752, GNorm = 0.6426, lr_0 = 5.3965e-04
Loss = 9.5063e-02, PNorm = 59.1857, GNorm = 0.7310, lr_0 = 5.3928e-04
Loss = 8.8703e-02, PNorm = 59.2004, GNorm = 0.6190, lr_0 = 5.3891e-04
Loss = 8.5940e-02, PNorm = 59.2146, GNorm = 0.8013, lr_0 = 5.3854e-04
Loss = 1.1469e-01, PNorm = 59.2291, GNorm = 0.7396, lr_0 = 5.3817e-04
Loss = 1.0826e-01, PNorm = 59.2408, GNorm = 1.3672, lr_0 = 5.3781e-04
Loss = 1.0152e-01, PNorm = 59.2528, GNorm = 1.3860, lr_0 = 5.3744e-04
Loss = 9.0195e-02, PNorm = 59.2687, GNorm = 0.5088, lr_0 = 5.3707e-04
Loss = 9.0577e-02, PNorm = 59.2845, GNorm = 0.8818, lr_0 = 5.3670e-04
Loss = 1.0589e-01, PNorm = 59.2970, GNorm = 0.9685, lr_0 = 5.3633e-04
Loss = 9.9867e-02, PNorm = 59.3068, GNorm = 0.5465, lr_0 = 5.3597e-04
Loss = 9.9420e-02, PNorm = 59.3184, GNorm = 0.5511, lr_0 = 5.3560e-04
Loss = 1.1717e-01, PNorm = 59.3294, GNorm = 1.2301, lr_0 = 5.3523e-04
Loss = 1.0086e-01, PNorm = 59.3427, GNorm = 1.1563, lr_0 = 5.3486e-04
Loss = 9.2483e-02, PNorm = 59.3618, GNorm = 0.5135, lr_0 = 5.3450e-04
Loss = 1.0475e-01, PNorm = 59.3752, GNorm = 0.4842, lr_0 = 5.3413e-04
Loss = 1.0451e-01, PNorm = 59.3818, GNorm = 0.5168, lr_0 = 5.3377e-04
Loss = 1.1204e-01, PNorm = 59.3930, GNorm = 0.6245, lr_0 = 5.3340e-04
Loss = 9.6364e-02, PNorm = 59.4063, GNorm = 1.0568, lr_0 = 5.3304e-04
Loss = 1.0594e-01, PNorm = 59.4218, GNorm = 0.7774, lr_0 = 5.3267e-04
Loss = 1.0315e-01, PNorm = 59.4316, GNorm = 0.5561, lr_0 = 5.3231e-04
Loss = 1.0748e-01, PNorm = 59.4450, GNorm = 0.7345, lr_0 = 5.3194e-04
Loss = 1.0235e-01, PNorm = 59.4592, GNorm = 1.0629, lr_0 = 5.3158e-04
Loss = 1.0922e-01, PNorm = 59.4699, GNorm = 1.5323, lr_0 = 5.3121e-04
Loss = 9.9179e-02, PNorm = 59.4847, GNorm = 0.8089, lr_0 = 5.3085e-04
Loss = 8.7520e-02, PNorm = 59.4965, GNorm = 0.4010, lr_0 = 5.3048e-04
Loss = 1.0909e-01, PNorm = 59.5065, GNorm = 0.7447, lr_0 = 5.3012e-04
Loss = 1.1461e-01, PNorm = 59.5205, GNorm = 0.7724, lr_0 = 5.2976e-04
Loss = 8.6882e-02, PNorm = 59.5333, GNorm = 1.1272, lr_0 = 5.2939e-04
Loss = 1.0856e-01, PNorm = 59.5494, GNorm = 0.7313, lr_0 = 5.2903e-04
Loss = 1.1404e-01, PNorm = 59.5656, GNorm = 1.4102, lr_0 = 5.2867e-04
Loss = 9.9791e-02, PNorm = 59.5773, GNorm = 0.6248, lr_0 = 5.2831e-04
Loss = 9.3040e-02, PNorm = 59.5872, GNorm = 0.6490, lr_0 = 5.2795e-04
Loss = 1.0401e-01, PNorm = 59.6002, GNorm = 0.7328, lr_0 = 5.2758e-04
Loss = 1.0737e-01, PNorm = 59.6162, GNorm = 0.6468, lr_0 = 5.2722e-04
Loss = 9.5732e-02, PNorm = 59.6304, GNorm = 0.8987, lr_0 = 5.2686e-04
Loss = 8.6055e-02, PNorm = 59.6398, GNorm = 0.8280, lr_0 = 5.2650e-04
Loss = 9.0438e-02, PNorm = 59.6455, GNorm = 0.6812, lr_0 = 5.2614e-04
Loss = 1.0434e-01, PNorm = 59.6560, GNorm = 1.4745, lr_0 = 5.2578e-04
Loss = 1.0683e-01, PNorm = 59.6661, GNorm = 0.9591, lr_0 = 5.2542e-04
Loss = 1.0009e-01, PNorm = 59.6744, GNorm = 1.9844, lr_0 = 5.2506e-04
Loss = 1.2490e-01, PNorm = 59.6893, GNorm = 1.0293, lr_0 = 5.2470e-04
Loss = 9.4134e-02, PNorm = 59.7029, GNorm = 0.7669, lr_0 = 5.2434e-04
Loss = 1.0191e-01, PNorm = 59.7167, GNorm = 0.5924, lr_0 = 5.2398e-04
Loss = 9.6543e-02, PNorm = 59.7303, GNorm = 0.9023, lr_0 = 5.2362e-04
Loss = 1.0198e-01, PNorm = 59.7408, GNorm = 0.7085, lr_0 = 5.2326e-04
Loss = 1.1092e-01, PNorm = 59.7533, GNorm = 0.8959, lr_0 = 5.2290e-04
Loss = 1.0767e-01, PNorm = 59.7679, GNorm = 0.7274, lr_0 = 5.2255e-04
Loss = 1.0077e-01, PNorm = 59.7797, GNorm = 0.9108, lr_0 = 5.2219e-04
Loss = 8.5000e-02, PNorm = 59.7926, GNorm = 0.5943, lr_0 = 5.2183e-04
Loss = 9.0865e-02, PNorm = 59.8051, GNorm = 0.6787, lr_0 = 5.2147e-04
Loss = 1.0125e-01, PNorm = 59.8164, GNorm = 1.3612, lr_0 = 5.2112e-04
Loss = 1.0468e-01, PNorm = 59.8250, GNorm = 1.0671, lr_0 = 5.2076e-04
Loss = 9.8128e-02, PNorm = 59.8394, GNorm = 0.5189, lr_0 = 5.2040e-04
Loss = 1.0143e-01, PNorm = 59.8490, GNorm = 0.9704, lr_0 = 5.2005e-04
Loss = 9.9463e-02, PNorm = 59.8541, GNorm = 0.8687, lr_0 = 5.1969e-04
Loss = 1.1237e-01, PNorm = 59.8654, GNorm = 0.7938, lr_0 = 5.1933e-04
Loss = 9.7994e-02, PNorm = 59.8778, GNorm = 1.0636, lr_0 = 5.1898e-04
Loss = 8.7505e-02, PNorm = 59.8895, GNorm = 1.0186, lr_0 = 5.1862e-04
Loss = 9.8157e-02, PNorm = 59.8964, GNorm = 0.6333, lr_0 = 5.1827e-04
Loss = 1.0711e-01, PNorm = 59.8995, GNorm = 0.8552, lr_0 = 5.1791e-04
Validation mae = 0.404197
Epoch 10
Loss = 8.5774e-02, PNorm = 59.9091, GNorm = 0.4940, lr_0 = 5.1756e-04
Loss = 8.5320e-02, PNorm = 59.9192, GNorm = 0.3963, lr_0 = 5.1720e-04
Loss = 7.8285e-02, PNorm = 59.9288, GNorm = 0.5573, lr_0 = 5.1685e-04
Loss = 8.9389e-02, PNorm = 59.9459, GNorm = 0.7097, lr_0 = 5.1649e-04
Loss = 8.6481e-02, PNorm = 59.9575, GNorm = 0.5675, lr_0 = 5.1614e-04
Loss = 8.5942e-02, PNorm = 59.9706, GNorm = 0.7348, lr_0 = 5.1579e-04
Loss = 1.1123e-01, PNorm = 59.9778, GNorm = 1.6115, lr_0 = 5.1543e-04
Loss = 1.0564e-01, PNorm = 59.9947, GNorm = 1.2480, lr_0 = 5.1508e-04
Loss = 9.5149e-02, PNorm = 60.0133, GNorm = 1.3405, lr_0 = 5.1473e-04
Loss = 8.5150e-02, PNorm = 60.0265, GNorm = 0.5721, lr_0 = 5.1437e-04
Loss = 8.2771e-02, PNorm = 60.0374, GNorm = 0.9629, lr_0 = 5.1402e-04
Loss = 9.4789e-02, PNorm = 60.0473, GNorm = 1.1808, lr_0 = 5.1367e-04
Loss = 9.3184e-02, PNorm = 60.0584, GNorm = 0.8220, lr_0 = 5.1332e-04
Loss = 7.6611e-02, PNorm = 60.0719, GNorm = 1.3272, lr_0 = 5.1297e-04
Loss = 8.2697e-02, PNorm = 60.0861, GNorm = 0.8503, lr_0 = 5.1262e-04
Loss = 9.5114e-02, PNorm = 60.0945, GNorm = 0.5634, lr_0 = 5.1226e-04
Loss = 9.0621e-02, PNorm = 60.1088, GNorm = 0.6856, lr_0 = 5.1191e-04
Loss = 8.7112e-02, PNorm = 60.1278, GNorm = 0.6045, lr_0 = 5.1156e-04
Loss = 8.2846e-02, PNorm = 60.1436, GNorm = 0.6592, lr_0 = 5.1121e-04
Loss = 9.6076e-02, PNorm = 60.1554, GNorm = 1.0389, lr_0 = 5.1086e-04
Loss = 9.8184e-02, PNorm = 60.1679, GNorm = 0.6820, lr_0 = 5.1051e-04
Loss = 8.6975e-02, PNorm = 60.1845, GNorm = 0.5539, lr_0 = 5.1016e-04
Loss = 8.2736e-02, PNorm = 60.2040, GNorm = 0.5124, lr_0 = 5.0981e-04
Loss = 9.5836e-02, PNorm = 60.2151, GNorm = 0.7183, lr_0 = 5.0946e-04
Loss = 9.6497e-02, PNorm = 60.2226, GNorm = 0.7539, lr_0 = 5.0911e-04
Loss = 1.0785e-01, PNorm = 60.2355, GNorm = 0.5664, lr_0 = 5.0877e-04
Loss = 9.2068e-02, PNorm = 60.2499, GNorm = 1.1011, lr_0 = 5.0842e-04
Loss = 9.6256e-02, PNorm = 60.2656, GNorm = 0.6233, lr_0 = 5.0807e-04
Loss = 9.3666e-02, PNorm = 60.2785, GNorm = 0.7628, lr_0 = 5.0772e-04
Loss = 1.1312e-01, PNorm = 60.2913, GNorm = 1.0791, lr_0 = 5.0737e-04
Loss = 1.0757e-01, PNorm = 60.3044, GNorm = 1.0012, lr_0 = 5.0703e-04
Loss = 1.0051e-01, PNorm = 60.3193, GNorm = 0.6797, lr_0 = 5.0668e-04
Loss = 9.4720e-02, PNorm = 60.3355, GNorm = 0.8099, lr_0 = 5.0633e-04
Loss = 9.3203e-02, PNorm = 60.3527, GNorm = 0.5955, lr_0 = 5.0598e-04
Loss = 8.7383e-02, PNorm = 60.3676, GNorm = 0.6537, lr_0 = 5.0564e-04
Loss = 9.6696e-02, PNorm = 60.3775, GNorm = 0.6622, lr_0 = 5.0529e-04
Loss = 1.1086e-01, PNorm = 60.3829, GNorm = 1.1720, lr_0 = 5.0494e-04
Loss = 9.5438e-02, PNorm = 60.3940, GNorm = 0.5753, lr_0 = 5.0460e-04
Loss = 9.0451e-02, PNorm = 60.4102, GNorm = 0.5722, lr_0 = 5.0425e-04
Loss = 9.1319e-02, PNorm = 60.4234, GNorm = 0.4889, lr_0 = 5.0391e-04
Loss = 8.9814e-02, PNorm = 60.4378, GNorm = 0.5703, lr_0 = 5.0356e-04
Loss = 9.6994e-02, PNorm = 60.4503, GNorm = 0.6959, lr_0 = 5.0322e-04
Loss = 9.4358e-02, PNorm = 60.4568, GNorm = 0.5875, lr_0 = 5.0287e-04
Loss = 9.1733e-02, PNorm = 60.4654, GNorm = 0.6353, lr_0 = 5.0253e-04
Loss = 8.1803e-02, PNorm = 60.4817, GNorm = 0.6573, lr_0 = 5.0218e-04
Loss = 1.1160e-01, PNorm = 60.4961, GNorm = 1.7187, lr_0 = 5.0184e-04
Loss = 9.5523e-02, PNorm = 60.5137, GNorm = 1.9820, lr_0 = 5.0150e-04
Loss = 1.0607e-01, PNorm = 60.5262, GNorm = 1.7708, lr_0 = 5.0115e-04
Loss = 9.7729e-02, PNorm = 60.5357, GNorm = 1.1042, lr_0 = 5.0081e-04
Loss = 9.8733e-02, PNorm = 60.5521, GNorm = 0.5234, lr_0 = 5.0047e-04
Loss = 8.7369e-02, PNorm = 60.5705, GNorm = 0.7449, lr_0 = 5.0012e-04
Loss = 9.4965e-02, PNorm = 60.5847, GNorm = 1.6403, lr_0 = 4.9978e-04
Loss = 8.7073e-02, PNorm = 60.6005, GNorm = 1.1802, lr_0 = 4.9944e-04
Loss = 8.7070e-02, PNorm = 60.6199, GNorm = 0.6034, lr_0 = 4.9910e-04
Loss = 9.8656e-02, PNorm = 60.6381, GNorm = 0.6089, lr_0 = 4.9875e-04
Loss = 8.1949e-02, PNorm = 60.6504, GNorm = 0.7812, lr_0 = 4.9841e-04
Loss = 7.6669e-02, PNorm = 60.6601, GNorm = 0.8078, lr_0 = 4.9807e-04
Loss = 8.2948e-02, PNorm = 60.6681, GNorm = 0.6656, lr_0 = 4.9773e-04
Loss = 1.0147e-01, PNorm = 60.6826, GNorm = 0.9379, lr_0 = 4.9739e-04
Loss = 9.9030e-02, PNorm = 60.6986, GNorm = 0.5938, lr_0 = 4.9705e-04
Loss = 9.9670e-02, PNorm = 60.7115, GNorm = 1.0609, lr_0 = 4.9671e-04
Loss = 8.0739e-02, PNorm = 60.7206, GNorm = 0.5521, lr_0 = 4.9637e-04
Loss = 8.0192e-02, PNorm = 60.7302, GNorm = 0.6056, lr_0 = 4.9603e-04
Loss = 9.7785e-02, PNorm = 60.7426, GNorm = 0.7040, lr_0 = 4.9569e-04
Loss = 9.3195e-02, PNorm = 60.7554, GNorm = 0.5990, lr_0 = 4.9535e-04
Loss = 8.5398e-02, PNorm = 60.7656, GNorm = 0.8003, lr_0 = 4.9501e-04
Loss = 9.7985e-02, PNorm = 60.7815, GNorm = 0.7572, lr_0 = 4.9467e-04
Loss = 9.3933e-02, PNorm = 60.7944, GNorm = 0.8514, lr_0 = 4.9433e-04
Loss = 9.5517e-02, PNorm = 60.8054, GNorm = 0.6004, lr_0 = 4.9399e-04
Loss = 1.0426e-01, PNorm = 60.8149, GNorm = 1.6471, lr_0 = 4.9365e-04
Loss = 9.2126e-02, PNorm = 60.8282, GNorm = 0.5181, lr_0 = 4.9332e-04
Loss = 8.0684e-02, PNorm = 60.8402, GNorm = 0.4035, lr_0 = 4.9298e-04
Loss = 9.5225e-02, PNorm = 60.8526, GNorm = 0.8448, lr_0 = 4.9264e-04
Loss = 9.2421e-02, PNorm = 60.8638, GNorm = 1.0219, lr_0 = 4.9230e-04
Loss = 8.5152e-02, PNorm = 60.8781, GNorm = 0.7534, lr_0 = 4.9197e-04
Loss = 8.6217e-02, PNorm = 60.8895, GNorm = 0.8933, lr_0 = 4.9163e-04
Loss = 9.3040e-02, PNorm = 60.8995, GNorm = 0.7921, lr_0 = 4.9129e-04
Loss = 1.1224e-01, PNorm = 60.9106, GNorm = 1.1425, lr_0 = 4.9095e-04
Loss = 9.1170e-02, PNorm = 60.9220, GNorm = 0.5818, lr_0 = 4.9062e-04
Loss = 9.5072e-02, PNorm = 60.9402, GNorm = 0.8900, lr_0 = 4.9028e-04
Loss = 9.3555e-02, PNorm = 60.9556, GNorm = 0.6198, lr_0 = 4.8995e-04
Loss = 9.3386e-02, PNorm = 60.9737, GNorm = 0.4558, lr_0 = 4.8961e-04
Loss = 9.2082e-02, PNorm = 60.9838, GNorm = 0.7050, lr_0 = 4.8928e-04
Loss = 1.0244e-01, PNorm = 60.9915, GNorm = 0.9492, lr_0 = 4.8894e-04
Loss = 8.1373e-02, PNorm = 61.0009, GNorm = 0.5470, lr_0 = 4.8861e-04
Loss = 9.3688e-02, PNorm = 61.0098, GNorm = 0.5245, lr_0 = 4.8827e-04
Loss = 1.0354e-01, PNorm = 61.0208, GNorm = 0.8106, lr_0 = 4.8794e-04
Loss = 8.2020e-02, PNorm = 61.0314, GNorm = 0.7208, lr_0 = 4.8760e-04
Loss = 9.0047e-02, PNorm = 61.0417, GNorm = 0.7062, lr_0 = 4.8727e-04
Loss = 8.2163e-02, PNorm = 61.0528, GNorm = 0.6916, lr_0 = 4.8693e-04
Loss = 1.0269e-01, PNorm = 61.0665, GNorm = 0.5753, lr_0 = 4.8660e-04
Loss = 9.9021e-02, PNorm = 61.0774, GNorm = 0.4757, lr_0 = 4.8627e-04
Loss = 1.1980e-01, PNorm = 61.0911, GNorm = 1.4455, lr_0 = 4.8593e-04
Loss = 1.0900e-01, PNorm = 61.1127, GNorm = 0.7396, lr_0 = 4.8560e-04
Loss = 8.2281e-02, PNorm = 61.1255, GNorm = 0.7911, lr_0 = 4.8527e-04
Loss = 9.4354e-02, PNorm = 61.1358, GNorm = 0.6315, lr_0 = 4.8494e-04
Loss = 7.7355e-02, PNorm = 61.1452, GNorm = 0.5375, lr_0 = 4.8460e-04
Loss = 1.0847e-01, PNorm = 61.1533, GNorm = 0.9331, lr_0 = 4.8427e-04
Loss = 1.0007e-01, PNorm = 61.1598, GNorm = 0.5812, lr_0 = 4.8394e-04
Loss = 9.2500e-02, PNorm = 61.1666, GNorm = 0.6386, lr_0 = 4.8361e-04
Loss = 8.9548e-02, PNorm = 61.1771, GNorm = 0.6126, lr_0 = 4.8328e-04
Loss = 1.1298e-01, PNorm = 61.1917, GNorm = 0.7551, lr_0 = 4.8295e-04
Loss = 9.4701e-02, PNorm = 61.2031, GNorm = 1.1196, lr_0 = 4.8262e-04
Loss = 9.4303e-02, PNorm = 61.2151, GNorm = 1.1029, lr_0 = 4.8228e-04
Loss = 7.8353e-02, PNorm = 61.2322, GNorm = 0.6398, lr_0 = 4.8195e-04
Loss = 9.4769e-02, PNorm = 61.2422, GNorm = 0.7805, lr_0 = 4.8162e-04
Loss = 8.4111e-02, PNorm = 61.2472, GNorm = 0.6606, lr_0 = 4.8129e-04
Loss = 1.0055e-01, PNorm = 61.2580, GNorm = 2.0050, lr_0 = 4.8096e-04
Loss = 1.0487e-01, PNorm = 61.2735, GNorm = 0.9868, lr_0 = 4.8064e-04
Loss = 9.4169e-02, PNorm = 61.2840, GNorm = 0.5441, lr_0 = 4.8031e-04
Loss = 8.5517e-02, PNorm = 61.2971, GNorm = 0.9646, lr_0 = 4.7998e-04
Loss = 9.8081e-02, PNorm = 61.3085, GNorm = 0.5752, lr_0 = 4.7965e-04
Loss = 8.3750e-02, PNorm = 61.3198, GNorm = 0.5593, lr_0 = 4.7932e-04
Loss = 9.8738e-02, PNorm = 61.3282, GNorm = 0.8895, lr_0 = 4.7899e-04
Loss = 1.1027e-01, PNorm = 61.3374, GNorm = 0.8377, lr_0 = 4.7866e-04
Loss = 1.0071e-01, PNorm = 61.3477, GNorm = 0.6248, lr_0 = 4.7833e-04
Loss = 8.3854e-02, PNorm = 61.3573, GNorm = 0.4543, lr_0 = 4.7801e-04
Loss = 8.6555e-02, PNorm = 61.3641, GNorm = 0.5834, lr_0 = 4.7768e-04
Loss = 8.4718e-02, PNorm = 61.3733, GNorm = 0.8011, lr_0 = 4.7735e-04
Loss = 1.0742e-01, PNorm = 61.3838, GNorm = 0.6660, lr_0 = 4.7703e-04
Validation mae = 0.399118
Epoch 11
Loss = 8.2185e-02, PNorm = 61.3964, GNorm = 0.6608, lr_0 = 4.7670e-04
Loss = 8.7757e-02, PNorm = 61.4104, GNorm = 0.6389, lr_0 = 4.7637e-04
Loss = 7.7628e-02, PNorm = 61.4198, GNorm = 0.5879, lr_0 = 4.7605e-04
Loss = 7.8175e-02, PNorm = 61.4320, GNorm = 0.9635, lr_0 = 4.7572e-04
Loss = 1.0774e-01, PNorm = 61.4482, GNorm = 0.8904, lr_0 = 4.7539e-04
Loss = 1.0613e-01, PNorm = 61.4612, GNorm = 0.9542, lr_0 = 4.7507e-04
Loss = 8.9618e-02, PNorm = 61.4714, GNorm = 0.6998, lr_0 = 4.7474e-04
Loss = 7.9492e-02, PNorm = 61.4854, GNorm = 0.5578, lr_0 = 4.7442e-04
Loss = 8.0771e-02, PNorm = 61.4991, GNorm = 0.5089, lr_0 = 4.7409e-04
Loss = 8.7201e-02, PNorm = 61.5149, GNorm = 0.8089, lr_0 = 4.7377e-04
Loss = 7.8566e-02, PNorm = 61.5293, GNorm = 1.0635, lr_0 = 4.7344e-04
Loss = 8.6468e-02, PNorm = 61.5392, GNorm = 0.6661, lr_0 = 4.7312e-04
Loss = 7.8166e-02, PNorm = 61.5478, GNorm = 1.4999, lr_0 = 4.7279e-04
Loss = 7.5266e-02, PNorm = 61.5551, GNorm = 0.5510, lr_0 = 4.7247e-04
Loss = 9.3597e-02, PNorm = 61.5682, GNorm = 1.2161, lr_0 = 4.7215e-04
Loss = 8.6115e-02, PNorm = 61.5786, GNorm = 0.7495, lr_0 = 4.7182e-04
Loss = 8.3114e-02, PNorm = 61.5906, GNorm = 0.6714, lr_0 = 4.7150e-04
Loss = 9.1661e-02, PNorm = 61.6012, GNorm = 0.6993, lr_0 = 4.7118e-04
Loss = 8.1714e-02, PNorm = 61.6143, GNorm = 0.4846, lr_0 = 4.7085e-04
Loss = 7.9741e-02, PNorm = 61.6235, GNorm = 0.6805, lr_0 = 4.7053e-04
Loss = 7.7521e-02, PNorm = 61.6342, GNorm = 0.8245, lr_0 = 4.7021e-04
Loss = 7.3177e-02, PNorm = 61.6447, GNorm = 0.6027, lr_0 = 4.6989e-04
Loss = 8.2443e-02, PNorm = 61.6553, GNorm = 0.5497, lr_0 = 4.6957e-04
Loss = 9.6192e-02, PNorm = 61.6626, GNorm = 0.7003, lr_0 = 4.6924e-04
Loss = 7.2395e-02, PNorm = 61.6700, GNorm = 0.6763, lr_0 = 4.6892e-04
Loss = 8.2467e-02, PNorm = 61.6829, GNorm = 0.8159, lr_0 = 4.6860e-04
Loss = 8.5129e-02, PNorm = 61.6961, GNorm = 1.0699, lr_0 = 4.6828e-04
Loss = 8.1656e-02, PNorm = 61.7064, GNorm = 0.6757, lr_0 = 4.6796e-04
Loss = 9.1429e-02, PNorm = 61.7205, GNorm = 0.6614, lr_0 = 4.6764e-04
Loss = 7.8154e-02, PNorm = 61.7304, GNorm = 0.9997, lr_0 = 4.6732e-04
Loss = 9.3942e-02, PNorm = 61.7429, GNorm = 0.5841, lr_0 = 4.6700e-04
Loss = 9.0389e-02, PNorm = 61.7559, GNorm = 0.5957, lr_0 = 4.6668e-04
Loss = 8.9211e-02, PNorm = 61.7674, GNorm = 0.5420, lr_0 = 4.6636e-04
Loss = 8.1834e-02, PNorm = 61.7791, GNorm = 0.6841, lr_0 = 4.6604e-04
Loss = 8.6199e-02, PNorm = 61.7865, GNorm = 0.4782, lr_0 = 4.6572e-04
Loss = 7.8850e-02, PNorm = 61.7951, GNorm = 0.5150, lr_0 = 4.6540e-04
Loss = 8.6423e-02, PNorm = 61.8032, GNorm = 1.2495, lr_0 = 4.6508e-04
Loss = 8.0273e-02, PNorm = 61.8140, GNorm = 0.6798, lr_0 = 4.6476e-04
Loss = 8.9319e-02, PNorm = 61.8244, GNorm = 0.6889, lr_0 = 4.6445e-04
Loss = 8.4321e-02, PNorm = 61.8355, GNorm = 0.5595, lr_0 = 4.6413e-04
Loss = 7.3526e-02, PNorm = 61.8490, GNorm = 0.9047, lr_0 = 4.6381e-04
Loss = 9.1691e-02, PNorm = 61.8647, GNorm = 0.9634, lr_0 = 4.6349e-04
Loss = 8.2888e-02, PNorm = 61.8804, GNorm = 0.5356, lr_0 = 4.6317e-04
Loss = 8.5085e-02, PNorm = 61.8936, GNorm = 0.5058, lr_0 = 4.6286e-04
Loss = 9.0179e-02, PNorm = 61.9033, GNorm = 1.9236, lr_0 = 4.6254e-04
Loss = 1.0538e-01, PNorm = 61.9129, GNorm = 1.3185, lr_0 = 4.6222e-04
Loss = 8.8468e-02, PNorm = 61.9258, GNorm = 0.5932, lr_0 = 4.6191e-04
Loss = 8.3137e-02, PNorm = 61.9371, GNorm = 0.5656, lr_0 = 4.6159e-04
Loss = 9.4355e-02, PNorm = 61.9479, GNorm = 0.6487, lr_0 = 4.6127e-04
Loss = 8.2785e-02, PNorm = 61.9588, GNorm = 0.9376, lr_0 = 4.6096e-04
Loss = 8.4519e-02, PNorm = 61.9701, GNorm = 0.8313, lr_0 = 4.6064e-04
Loss = 9.0551e-02, PNorm = 61.9836, GNorm = 0.7171, lr_0 = 4.6033e-04
Loss = 8.1220e-02, PNorm = 61.9979, GNorm = 1.2726, lr_0 = 4.6001e-04
Loss = 9.0375e-02, PNorm = 62.0106, GNorm = 0.6149, lr_0 = 4.5970e-04
Loss = 1.0129e-01, PNorm = 62.0221, GNorm = 0.8282, lr_0 = 4.5938e-04
Loss = 7.6240e-02, PNorm = 62.0312, GNorm = 0.5302, lr_0 = 4.5907e-04
Loss = 8.6992e-02, PNorm = 62.0457, GNorm = 0.6711, lr_0 = 4.5875e-04
Loss = 6.4199e-02, PNorm = 62.0560, GNorm = 0.5939, lr_0 = 4.5844e-04
Loss = 9.3241e-02, PNorm = 62.0651, GNorm = 0.5814, lr_0 = 4.5812e-04
Loss = 8.4586e-02, PNorm = 62.0743, GNorm = 1.0428, lr_0 = 4.5781e-04
Loss = 7.7362e-02, PNorm = 62.0803, GNorm = 0.8988, lr_0 = 4.5750e-04
Loss = 7.9564e-02, PNorm = 62.0864, GNorm = 0.5310, lr_0 = 4.5718e-04
Loss = 8.7073e-02, PNorm = 62.0938, GNorm = 0.5998, lr_0 = 4.5687e-04
Loss = 7.8979e-02, PNorm = 62.1034, GNorm = 0.5726, lr_0 = 4.5656e-04
Loss = 7.8109e-02, PNorm = 62.1130, GNorm = 0.7589, lr_0 = 4.5624e-04
Loss = 8.5198e-02, PNorm = 62.1221, GNorm = 0.6094, lr_0 = 4.5593e-04
Loss = 8.9729e-02, PNorm = 62.1336, GNorm = 0.5680, lr_0 = 4.5562e-04
Loss = 9.1190e-02, PNorm = 62.1480, GNorm = 0.6852, lr_0 = 4.5531e-04
Loss = 1.0425e-01, PNorm = 62.1635, GNorm = 0.6205, lr_0 = 4.5499e-04
Loss = 8.1911e-02, PNorm = 62.1769, GNorm = 0.4564, lr_0 = 4.5468e-04
Loss = 9.0055e-02, PNorm = 62.1883, GNorm = 0.7913, lr_0 = 4.5437e-04
Loss = 9.2124e-02, PNorm = 62.2010, GNorm = 1.0130, lr_0 = 4.5406e-04
Loss = 8.0948e-02, PNorm = 62.2159, GNorm = 0.6797, lr_0 = 4.5375e-04
Loss = 8.9550e-02, PNorm = 62.2262, GNorm = 0.7965, lr_0 = 4.5344e-04
Loss = 1.0577e-01, PNorm = 62.2393, GNorm = 1.2379, lr_0 = 4.5313e-04
Loss = 9.5505e-02, PNorm = 62.2554, GNorm = 1.3302, lr_0 = 4.5282e-04
Loss = 9.3712e-02, PNorm = 62.2654, GNorm = 0.6325, lr_0 = 4.5251e-04
Loss = 9.1721e-02, PNorm = 62.2732, GNorm = 0.9063, lr_0 = 4.5220e-04
Loss = 9.3084e-02, PNorm = 62.2854, GNorm = 0.5945, lr_0 = 4.5189e-04
Loss = 8.5568e-02, PNorm = 62.2989, GNorm = 0.5241, lr_0 = 4.5158e-04
Loss = 7.8410e-02, PNorm = 62.3097, GNorm = 0.5108, lr_0 = 4.5127e-04
Loss = 8.4515e-02, PNorm = 62.3170, GNorm = 1.2894, lr_0 = 4.5096e-04
Loss = 8.6315e-02, PNorm = 62.3230, GNorm = 0.6588, lr_0 = 4.5065e-04
Loss = 9.8357e-02, PNorm = 62.3311, GNorm = 1.3993, lr_0 = 4.5034e-04
Loss = 8.5066e-02, PNorm = 62.3432, GNorm = 0.8303, lr_0 = 4.5003e-04
Loss = 8.4298e-02, PNorm = 62.3568, GNorm = 1.2800, lr_0 = 4.4972e-04
Loss = 9.9923e-02, PNorm = 62.3703, GNorm = 1.1515, lr_0 = 4.4942e-04
Loss = 8.4635e-02, PNorm = 62.3823, GNorm = 0.6508, lr_0 = 4.4911e-04
Loss = 9.5276e-02, PNorm = 62.3896, GNorm = 0.5634, lr_0 = 4.4880e-04
Loss = 1.0322e-01, PNorm = 62.3976, GNorm = 0.9690, lr_0 = 4.4849e-04
Loss = 1.0150e-01, PNorm = 62.4095, GNorm = 0.8047, lr_0 = 4.4819e-04
Loss = 9.9804e-02, PNorm = 62.4291, GNorm = 0.9093, lr_0 = 4.4788e-04
Loss = 9.5088e-02, PNorm = 62.4441, GNorm = 0.5626, lr_0 = 4.4757e-04
Loss = 8.7080e-02, PNorm = 62.4540, GNorm = 1.4032, lr_0 = 4.4727e-04
Loss = 8.5452e-02, PNorm = 62.4635, GNorm = 0.6218, lr_0 = 4.4696e-04
Loss = 9.6725e-02, PNorm = 62.4751, GNorm = 0.9586, lr_0 = 4.4665e-04
Loss = 8.3138e-02, PNorm = 62.4858, GNorm = 0.5843, lr_0 = 4.4635e-04
Loss = 9.6417e-02, PNorm = 62.4937, GNorm = 0.8913, lr_0 = 4.4604e-04
Loss = 9.2383e-02, PNorm = 62.5014, GNorm = 0.6901, lr_0 = 4.4574e-04
Loss = 9.3323e-02, PNorm = 62.5143, GNorm = 0.5079, lr_0 = 4.4543e-04
Loss = 1.0139e-01, PNorm = 62.5283, GNorm = 0.6254, lr_0 = 4.4513e-04
Loss = 9.2383e-02, PNorm = 62.5402, GNorm = 1.3192, lr_0 = 4.4482e-04
Loss = 8.6474e-02, PNorm = 62.5504, GNorm = 0.4835, lr_0 = 4.4452e-04
Loss = 9.3459e-02, PNorm = 62.5605, GNorm = 0.9372, lr_0 = 4.4421e-04
Loss = 9.8705e-02, PNorm = 62.5744, GNorm = 0.7525, lr_0 = 4.4391e-04
Loss = 8.6546e-02, PNorm = 62.5830, GNorm = 0.6401, lr_0 = 4.4360e-04
Loss = 8.5350e-02, PNorm = 62.5909, GNorm = 0.6379, lr_0 = 4.4330e-04
Loss = 9.6110e-02, PNorm = 62.6001, GNorm = 1.1263, lr_0 = 4.4299e-04
Loss = 9.0305e-02, PNorm = 62.6083, GNorm = 0.5100, lr_0 = 4.4269e-04
Loss = 8.8562e-02, PNorm = 62.6165, GNorm = 1.3496, lr_0 = 4.4239e-04
Loss = 8.5932e-02, PNorm = 62.6242, GNorm = 0.6067, lr_0 = 4.4209e-04
Loss = 8.0081e-02, PNorm = 62.6342, GNorm = 0.5661, lr_0 = 4.4178e-04
Loss = 8.9426e-02, PNorm = 62.6447, GNorm = 1.1618, lr_0 = 4.4148e-04
Loss = 8.2747e-02, PNorm = 62.6571, GNorm = 0.7193, lr_0 = 4.4118e-04
Loss = 8.8583e-02, PNorm = 62.6683, GNorm = 0.7972, lr_0 = 4.4088e-04
Loss = 9.0372e-02, PNorm = 62.6772, GNorm = 1.0127, lr_0 = 4.4057e-04
Loss = 8.1385e-02, PNorm = 62.6849, GNorm = 0.8537, lr_0 = 4.4027e-04
Loss = 8.1801e-02, PNorm = 62.6943, GNorm = 0.7565, lr_0 = 4.3997e-04
Loss = 8.7902e-02, PNorm = 62.7016, GNorm = 0.7944, lr_0 = 4.3967e-04
Loss = 8.8425e-02, PNorm = 62.7085, GNorm = 0.5942, lr_0 = 4.3937e-04
Validation mae = 0.394621
Epoch 12
Loss = 7.6778e-02, PNorm = 62.7176, GNorm = 0.9681, lr_0 = 4.3907e-04
Loss = 6.7039e-02, PNorm = 62.7244, GNorm = 0.7707, lr_0 = 4.3877e-04
Loss = 8.1739e-02, PNorm = 62.7377, GNorm = 0.4451, lr_0 = 4.3846e-04
Loss = 6.9582e-02, PNorm = 62.7497, GNorm = 0.6131, lr_0 = 4.3816e-04
Loss = 8.3216e-02, PNorm = 62.7592, GNorm = 0.6969, lr_0 = 4.3786e-04
Loss = 7.4587e-02, PNorm = 62.7691, GNorm = 0.7468, lr_0 = 4.3756e-04
Loss = 8.0452e-02, PNorm = 62.7801, GNorm = 0.5803, lr_0 = 4.3726e-04
Loss = 7.2591e-02, PNorm = 62.7927, GNorm = 0.5226, lr_0 = 4.3696e-04
Loss = 7.8087e-02, PNorm = 62.8038, GNorm = 0.5873, lr_0 = 4.3667e-04
Loss = 7.2265e-02, PNorm = 62.8164, GNorm = 0.5870, lr_0 = 4.3637e-04
Loss = 7.6698e-02, PNorm = 62.8270, GNorm = 0.7212, lr_0 = 4.3607e-04
Loss = 7.9419e-02, PNorm = 62.8390, GNorm = 0.5362, lr_0 = 4.3577e-04
Loss = 7.0476e-02, PNorm = 62.8482, GNorm = 0.6000, lr_0 = 4.3547e-04
Loss = 8.9716e-02, PNorm = 62.8553, GNorm = 0.7296, lr_0 = 4.3517e-04
Loss = 8.3610e-02, PNorm = 62.8645, GNorm = 1.0751, lr_0 = 4.3487e-04
Loss = 7.3656e-02, PNorm = 62.8768, GNorm = 0.5332, lr_0 = 4.3458e-04
Loss = 7.0759e-02, PNorm = 62.8879, GNorm = 0.8964, lr_0 = 4.3428e-04
Loss = 8.3915e-02, PNorm = 62.8989, GNorm = 0.6348, lr_0 = 4.3398e-04
Loss = 7.8717e-02, PNorm = 62.9117, GNorm = 0.7446, lr_0 = 4.3368e-04
Loss = 7.9738e-02, PNorm = 62.9216, GNorm = 0.8494, lr_0 = 4.3339e-04
Loss = 7.9077e-02, PNorm = 62.9288, GNorm = 0.4555, lr_0 = 4.3309e-04
Loss = 7.9208e-02, PNorm = 62.9401, GNorm = 0.7737, lr_0 = 4.3279e-04
Loss = 8.0586e-02, PNorm = 62.9484, GNorm = 0.5455, lr_0 = 4.3250e-04
Loss = 7.3059e-02, PNorm = 62.9580, GNorm = 0.7060, lr_0 = 4.3220e-04
Loss = 8.9624e-02, PNorm = 62.9672, GNorm = 0.4990, lr_0 = 4.3190e-04
Loss = 7.5320e-02, PNorm = 62.9816, GNorm = 0.4655, lr_0 = 4.3161e-04
Loss = 7.5237e-02, PNorm = 62.9926, GNorm = 0.6118, lr_0 = 4.3131e-04
Loss = 8.2888e-02, PNorm = 63.0024, GNorm = 0.8030, lr_0 = 4.3102e-04
Loss = 7.8345e-02, PNorm = 63.0124, GNorm = 0.5400, lr_0 = 4.3072e-04
Loss = 6.0166e-02, PNorm = 63.0183, GNorm = 0.5080, lr_0 = 4.3043e-04
Loss = 7.9171e-02, PNorm = 63.0236, GNorm = 0.7977, lr_0 = 4.3013e-04
Loss = 7.0545e-02, PNorm = 63.0316, GNorm = 0.4088, lr_0 = 4.2984e-04
Loss = 7.1576e-02, PNorm = 63.0382, GNorm = 0.7189, lr_0 = 4.2954e-04
Loss = 7.6652e-02, PNorm = 63.0445, GNorm = 0.5784, lr_0 = 4.2925e-04
Loss = 7.6947e-02, PNorm = 63.0543, GNorm = 0.6794, lr_0 = 4.2895e-04
Loss = 7.9814e-02, PNorm = 63.0639, GNorm = 0.5426, lr_0 = 4.2866e-04
Loss = 8.0665e-02, PNorm = 63.0746, GNorm = 0.7647, lr_0 = 4.2837e-04
Loss = 8.0590e-02, PNorm = 63.0851, GNorm = 0.4728, lr_0 = 4.2807e-04
Loss = 8.4135e-02, PNorm = 63.0923, GNorm = 1.0777, lr_0 = 4.2778e-04
Loss = 7.9283e-02, PNorm = 63.0981, GNorm = 0.9360, lr_0 = 4.2749e-04
Loss = 8.0641e-02, PNorm = 63.1106, GNorm = 1.0787, lr_0 = 4.2719e-04
Loss = 9.4406e-02, PNorm = 63.1198, GNorm = 0.5758, lr_0 = 4.2690e-04
Loss = 7.8279e-02, PNorm = 63.1302, GNorm = 0.5761, lr_0 = 4.2661e-04
Loss = 8.3698e-02, PNorm = 63.1406, GNorm = 0.5048, lr_0 = 4.2632e-04
Loss = 9.2482e-02, PNorm = 63.1478, GNorm = 0.5616, lr_0 = 4.2602e-04
Loss = 7.6887e-02, PNorm = 63.1592, GNorm = 0.5344, lr_0 = 4.2573e-04
Loss = 7.8382e-02, PNorm = 63.1708, GNorm = 0.7321, lr_0 = 4.2544e-04
Loss = 8.3231e-02, PNorm = 63.1782, GNorm = 0.4450, lr_0 = 4.2515e-04
Loss = 9.4579e-02, PNorm = 63.1888, GNorm = 0.6038, lr_0 = 4.2486e-04
Loss = 8.4520e-02, PNorm = 63.1969, GNorm = 0.3353, lr_0 = 4.2457e-04
Loss = 8.8862e-02, PNorm = 63.2098, GNorm = 0.9397, lr_0 = 4.2428e-04
Loss = 6.8730e-02, PNorm = 63.2204, GNorm = 0.5329, lr_0 = 4.2399e-04
Loss = 8.8348e-02, PNorm = 63.2307, GNorm = 0.7581, lr_0 = 4.2370e-04
Loss = 8.3553e-02, PNorm = 63.2390, GNorm = 0.7208, lr_0 = 4.2340e-04
Loss = 8.0112e-02, PNorm = 63.2481, GNorm = 0.5274, lr_0 = 4.2311e-04
Loss = 7.2326e-02, PNorm = 63.2569, GNorm = 0.7729, lr_0 = 4.2283e-04
Loss = 7.3537e-02, PNorm = 63.2672, GNorm = 0.5919, lr_0 = 4.2254e-04
Loss = 7.9267e-02, PNorm = 63.2816, GNorm = 0.5266, lr_0 = 4.2225e-04
Loss = 9.9123e-02, PNorm = 63.2939, GNorm = 0.7184, lr_0 = 4.2196e-04
Loss = 7.8652e-02, PNorm = 63.3002, GNorm = 0.8363, lr_0 = 4.2167e-04
Loss = 7.5668e-02, PNorm = 63.3047, GNorm = 0.6608, lr_0 = 4.2138e-04
Loss = 7.6323e-02, PNorm = 63.3181, GNorm = 0.5420, lr_0 = 4.2109e-04
Loss = 7.9948e-02, PNorm = 63.3263, GNorm = 0.7793, lr_0 = 4.2080e-04
Loss = 7.8316e-02, PNorm = 63.3360, GNorm = 0.5969, lr_0 = 4.2051e-04
Loss = 7.9054e-02, PNorm = 63.3445, GNorm = 0.5096, lr_0 = 4.2023e-04
Loss = 7.2799e-02, PNorm = 63.3532, GNorm = 0.7967, lr_0 = 4.1994e-04
Loss = 8.0027e-02, PNorm = 63.3622, GNorm = 1.0735, lr_0 = 4.1965e-04
Loss = 7.6215e-02, PNorm = 63.3706, GNorm = 0.5510, lr_0 = 4.1936e-04
Loss = 9.0690e-02, PNorm = 63.3777, GNorm = 0.6104, lr_0 = 4.1907e-04
Loss = 7.1170e-02, PNorm = 63.3829, GNorm = 1.4609, lr_0 = 4.1879e-04
Loss = 8.5804e-02, PNorm = 63.3924, GNorm = 0.6384, lr_0 = 4.1850e-04
Loss = 7.9476e-02, PNorm = 63.4034, GNorm = 0.5008, lr_0 = 4.1821e-04
Loss = 8.3768e-02, PNorm = 63.4099, GNorm = 0.6703, lr_0 = 4.1793e-04
Loss = 9.5361e-02, PNorm = 63.4151, GNorm = 0.9348, lr_0 = 4.1764e-04
Loss = 7.4286e-02, PNorm = 63.4247, GNorm = 0.4688, lr_0 = 4.1736e-04
Loss = 7.6309e-02, PNorm = 63.4337, GNorm = 0.6655, lr_0 = 4.1707e-04
Loss = 8.4970e-02, PNorm = 63.4454, GNorm = 0.6475, lr_0 = 4.1678e-04
Loss = 9.6098e-02, PNorm = 63.4539, GNorm = 0.6043, lr_0 = 4.1650e-04
Loss = 8.5016e-02, PNorm = 63.4636, GNorm = 1.1224, lr_0 = 4.1621e-04
Loss = 8.7726e-02, PNorm = 63.4747, GNorm = 1.1774, lr_0 = 4.1593e-04
Loss = 7.7411e-02, PNorm = 63.4849, GNorm = 0.6301, lr_0 = 4.1564e-04
Loss = 8.6114e-02, PNorm = 63.4983, GNorm = 0.6460, lr_0 = 4.1536e-04
Loss = 7.3237e-02, PNorm = 63.5088, GNorm = 0.5716, lr_0 = 4.1507e-04
Loss = 7.3179e-02, PNorm = 63.5155, GNorm = 0.5070, lr_0 = 4.1479e-04
Loss = 7.3158e-02, PNorm = 63.5226, GNorm = 0.5557, lr_0 = 4.1450e-04
Loss = 9.0988e-02, PNorm = 63.5306, GNorm = 0.5340, lr_0 = 4.1422e-04
Loss = 7.6766e-02, PNorm = 63.5386, GNorm = 0.5873, lr_0 = 4.1394e-04
Loss = 8.0366e-02, PNorm = 63.5522, GNorm = 0.4711, lr_0 = 4.1365e-04
Loss = 7.6478e-02, PNorm = 63.5624, GNorm = 1.1492, lr_0 = 4.1337e-04
Loss = 7.3107e-02, PNorm = 63.5710, GNorm = 1.0860, lr_0 = 4.1309e-04
Loss = 9.4095e-02, PNorm = 63.5787, GNorm = 0.9102, lr_0 = 4.1280e-04
Loss = 8.5724e-02, PNorm = 63.5886, GNorm = 0.7621, lr_0 = 4.1252e-04
Loss = 8.6554e-02, PNorm = 63.5987, GNorm = 0.4925, lr_0 = 4.1224e-04
Loss = 7.8411e-02, PNorm = 63.6097, GNorm = 0.5989, lr_0 = 4.1196e-04
Loss = 7.4130e-02, PNorm = 63.6200, GNorm = 0.5643, lr_0 = 4.1167e-04
Loss = 6.8794e-02, PNorm = 63.6257, GNorm = 0.7060, lr_0 = 4.1139e-04
Loss = 9.0152e-02, PNorm = 63.6367, GNorm = 1.0788, lr_0 = 4.1111e-04
Loss = 8.1872e-02, PNorm = 63.6494, GNorm = 0.5542, lr_0 = 4.1083e-04
Loss = 8.0122e-02, PNorm = 63.6513, GNorm = 0.5977, lr_0 = 4.1055e-04
Loss = 7.6717e-02, PNorm = 63.6553, GNorm = 0.6691, lr_0 = 4.1027e-04
Loss = 7.2599e-02, PNorm = 63.6631, GNorm = 0.8383, lr_0 = 4.0998e-04
Loss = 6.4772e-02, PNorm = 63.6717, GNorm = 0.4777, lr_0 = 4.0970e-04
Loss = 7.6919e-02, PNorm = 63.6813, GNorm = 0.7429, lr_0 = 4.0942e-04
Loss = 7.7508e-02, PNorm = 63.6878, GNorm = 0.5333, lr_0 = 4.0914e-04
Loss = 8.1010e-02, PNorm = 63.6985, GNorm = 0.7657, lr_0 = 4.0886e-04
Loss = 8.6600e-02, PNorm = 63.7095, GNorm = 1.0253, lr_0 = 4.0858e-04
Loss = 9.0043e-02, PNorm = 63.7180, GNorm = 0.8608, lr_0 = 4.0830e-04
Loss = 8.0280e-02, PNorm = 63.7274, GNorm = 0.6051, lr_0 = 4.0802e-04
Loss = 8.7580e-02, PNorm = 63.7381, GNorm = 0.5926, lr_0 = 4.0774e-04
Loss = 8.7205e-02, PNorm = 63.7497, GNorm = 0.5735, lr_0 = 4.0746e-04
Loss = 7.6322e-02, PNorm = 63.7559, GNorm = 0.5385, lr_0 = 4.0718e-04
Loss = 7.7228e-02, PNorm = 63.7633, GNorm = 0.4804, lr_0 = 4.0691e-04
Loss = 7.1851e-02, PNorm = 63.7711, GNorm = 0.5176, lr_0 = 4.0663e-04
Loss = 6.7325e-02, PNorm = 63.7781, GNorm = 0.4627, lr_0 = 4.0635e-04
Loss = 8.6589e-02, PNorm = 63.7903, GNorm = 1.0143, lr_0 = 4.0607e-04
Loss = 8.3489e-02, PNorm = 63.7980, GNorm = 0.8635, lr_0 = 4.0579e-04
Loss = 7.8070e-02, PNorm = 63.8064, GNorm = 0.3963, lr_0 = 4.0551e-04
Loss = 7.7374e-02, PNorm = 63.8156, GNorm = 0.4348, lr_0 = 4.0524e-04
Loss = 7.8786e-02, PNorm = 63.8247, GNorm = 0.5957, lr_0 = 4.0496e-04
Loss = 8.4487e-02, PNorm = 63.8333, GNorm = 0.7153, lr_0 = 4.0468e-04
Validation mae = 0.388538
Epoch 13
Loss = 6.9159e-02, PNorm = 63.8408, GNorm = 0.4638, lr_0 = 4.0440e-04
Loss = 7.6543e-02, PNorm = 63.8522, GNorm = 0.4822, lr_0 = 4.0413e-04
Loss = 6.8662e-02, PNorm = 63.8627, GNorm = 0.6015, lr_0 = 4.0385e-04
Loss = 8.0919e-02, PNorm = 63.8689, GNorm = 0.7931, lr_0 = 4.0357e-04
Loss = 7.7983e-02, PNorm = 63.8756, GNorm = 0.6972, lr_0 = 4.0330e-04
Loss = 6.8424e-02, PNorm = 63.8855, GNorm = 0.6665, lr_0 = 4.0302e-04
Loss = 6.9726e-02, PNorm = 63.8957, GNorm = 0.7674, lr_0 = 4.0274e-04
Loss = 6.8494e-02, PNorm = 63.9051, GNorm = 0.6031, lr_0 = 4.0247e-04
Loss = 6.0840e-02, PNorm = 63.9153, GNorm = 0.6664, lr_0 = 4.0219e-04
Loss = 6.8158e-02, PNorm = 63.9212, GNorm = 0.4954, lr_0 = 4.0192e-04
Loss = 7.1688e-02, PNorm = 63.9302, GNorm = 0.9569, lr_0 = 4.0164e-04
Loss = 7.8083e-02, PNorm = 63.9379, GNorm = 0.4005, lr_0 = 4.0137e-04
Loss = 7.6081e-02, PNorm = 63.9482, GNorm = 0.9363, lr_0 = 4.0109e-04
Loss = 7.7838e-02, PNorm = 63.9598, GNorm = 1.2965, lr_0 = 4.0082e-04
Loss = 6.9570e-02, PNorm = 63.9721, GNorm = 0.7388, lr_0 = 4.0054e-04
Loss = 7.8528e-02, PNorm = 63.9840, GNorm = 0.4638, lr_0 = 4.0027e-04
Loss = 6.8897e-02, PNorm = 63.9928, GNorm = 0.5705, lr_0 = 3.9999e-04
Loss = 7.0339e-02, PNorm = 64.0002, GNorm = 0.5714, lr_0 = 3.9972e-04
Loss = 7.9856e-02, PNorm = 64.0070, GNorm = 0.6893, lr_0 = 3.9945e-04
Loss = 7.8065e-02, PNorm = 64.0143, GNorm = 0.8234, lr_0 = 3.9917e-04
Loss = 7.4364e-02, PNorm = 64.0254, GNorm = 0.4718, lr_0 = 3.9890e-04
Loss = 7.8193e-02, PNorm = 64.0365, GNorm = 0.5270, lr_0 = 3.9863e-04
Loss = 7.3710e-02, PNorm = 64.0430, GNorm = 0.6260, lr_0 = 3.9835e-04
Loss = 6.6111e-02, PNorm = 64.0508, GNorm = 0.5806, lr_0 = 3.9808e-04
Loss = 7.2697e-02, PNorm = 64.0623, GNorm = 0.7261, lr_0 = 3.9781e-04
Loss = 6.8239e-02, PNorm = 64.0729, GNorm = 0.4617, lr_0 = 3.9753e-04
Loss = 7.5800e-02, PNorm = 64.0816, GNorm = 0.5345, lr_0 = 3.9726e-04
Loss = 6.8966e-02, PNorm = 64.0904, GNorm = 0.6651, lr_0 = 3.9699e-04
Loss = 7.5403e-02, PNorm = 64.0968, GNorm = 0.6905, lr_0 = 3.9672e-04
Loss = 7.2629e-02, PNorm = 64.1043, GNorm = 0.6020, lr_0 = 3.9645e-04
Loss = 7.1562e-02, PNorm = 64.1136, GNorm = 0.5538, lr_0 = 3.9617e-04
Loss = 6.9198e-02, PNorm = 64.1224, GNorm = 0.8191, lr_0 = 3.9590e-04
Loss = 6.7601e-02, PNorm = 64.1305, GNorm = 0.9668, lr_0 = 3.9563e-04
Loss = 7.8011e-02, PNorm = 64.1391, GNorm = 1.5734, lr_0 = 3.9536e-04
Loss = 6.7974e-02, PNorm = 64.1510, GNorm = 0.9808, lr_0 = 3.9509e-04
Loss = 8.2971e-02, PNorm = 64.1623, GNorm = 0.6496, lr_0 = 3.9482e-04
Loss = 6.9117e-02, PNorm = 64.1734, GNorm = 0.5230, lr_0 = 3.9455e-04
Loss = 6.5084e-02, PNorm = 64.1837, GNorm = 0.6628, lr_0 = 3.9428e-04
Loss = 7.3841e-02, PNorm = 64.1942, GNorm = 0.4557, lr_0 = 3.9401e-04
Loss = 7.5370e-02, PNorm = 64.2023, GNorm = 0.5239, lr_0 = 3.9374e-04
Loss = 7.5897e-02, PNorm = 64.2086, GNorm = 0.6543, lr_0 = 3.9347e-04
Loss = 7.5457e-02, PNorm = 64.2154, GNorm = 0.6112, lr_0 = 3.9320e-04
Loss = 7.0461e-02, PNorm = 64.2236, GNorm = 0.7876, lr_0 = 3.9293e-04
Loss = 7.3378e-02, PNorm = 64.2317, GNorm = 0.8087, lr_0 = 3.9266e-04
Loss = 8.4612e-02, PNorm = 64.2412, GNorm = 0.4999, lr_0 = 3.9239e-04
Loss = 8.5858e-02, PNorm = 64.2492, GNorm = 0.9322, lr_0 = 3.9212e-04
Loss = 7.3469e-02, PNorm = 64.2610, GNorm = 0.7982, lr_0 = 3.9185e-04
Loss = 8.0107e-02, PNorm = 64.2712, GNorm = 0.5052, lr_0 = 3.9159e-04
Loss = 7.9205e-02, PNorm = 64.2802, GNorm = 0.5415, lr_0 = 3.9132e-04
Loss = 9.1141e-02, PNorm = 64.2929, GNorm = 0.8808, lr_0 = 3.9105e-04
Loss = 7.5339e-02, PNorm = 64.3043, GNorm = 0.6230, lr_0 = 3.9078e-04
Loss = 6.7173e-02, PNorm = 64.3143, GNorm = 0.6502, lr_0 = 3.9051e-04
Loss = 7.0870e-02, PNorm = 64.3216, GNorm = 0.8465, lr_0 = 3.9025e-04
Loss = 6.9837e-02, PNorm = 64.3326, GNorm = 1.1346, lr_0 = 3.8998e-04
Loss = 7.0141e-02, PNorm = 64.3431, GNorm = 1.0147, lr_0 = 3.8971e-04
Loss = 7.0656e-02, PNorm = 64.3520, GNorm = 0.7524, lr_0 = 3.8945e-04
Loss = 9.0363e-02, PNorm = 64.3592, GNorm = 0.5014, lr_0 = 3.8918e-04
Loss = 8.3063e-02, PNorm = 64.3701, GNorm = 0.6508, lr_0 = 3.8891e-04
Loss = 8.2005e-02, PNorm = 64.3827, GNorm = 0.5612, lr_0 = 3.8865e-04
Loss = 7.3051e-02, PNorm = 64.3915, GNorm = 0.6475, lr_0 = 3.8838e-04
Loss = 7.9775e-02, PNorm = 64.3995, GNorm = 0.4773, lr_0 = 3.8811e-04
Loss = 7.1702e-02, PNorm = 64.4070, GNorm = 0.5464, lr_0 = 3.8785e-04
Loss = 8.1896e-02, PNorm = 64.4128, GNorm = 0.6036, lr_0 = 3.8758e-04
Loss = 7.5505e-02, PNorm = 64.4207, GNorm = 0.6428, lr_0 = 3.8732e-04
Loss = 6.6862e-02, PNorm = 64.4260, GNorm = 0.5839, lr_0 = 3.8705e-04
Loss = 7.0117e-02, PNorm = 64.4352, GNorm = 0.9249, lr_0 = 3.8679e-04
Loss = 9.1447e-02, PNorm = 64.4481, GNorm = 0.5960, lr_0 = 3.8652e-04
Loss = 8.1922e-02, PNorm = 64.4637, GNorm = 0.5898, lr_0 = 3.8626e-04
Loss = 6.8011e-02, PNorm = 64.4730, GNorm = 0.8270, lr_0 = 3.8599e-04
Loss = 7.3744e-02, PNorm = 64.4801, GNorm = 0.6684, lr_0 = 3.8573e-04
Loss = 7.7515e-02, PNorm = 64.4899, GNorm = 0.7406, lr_0 = 3.8546e-04
Loss = 7.9204e-02, PNorm = 64.4992, GNorm = 0.7387, lr_0 = 3.8520e-04
Loss = 8.0499e-02, PNorm = 64.5032, GNorm = 0.5748, lr_0 = 3.8493e-04
Loss = 6.8647e-02, PNorm = 64.5091, GNorm = 0.7977, lr_0 = 3.8467e-04
Loss = 7.2996e-02, PNorm = 64.5196, GNorm = 0.5340, lr_0 = 3.8441e-04
Loss = 6.9121e-02, PNorm = 64.5320, GNorm = 0.9099, lr_0 = 3.8414e-04
Loss = 6.9571e-02, PNorm = 64.5410, GNorm = 0.9763, lr_0 = 3.8388e-04
Loss = 7.7071e-02, PNorm = 64.5514, GNorm = 1.1610, lr_0 = 3.8362e-04
Loss = 8.1610e-02, PNorm = 64.5583, GNorm = 0.6044, lr_0 = 3.8336e-04
Loss = 6.5361e-02, PNorm = 64.5661, GNorm = 0.4539, lr_0 = 3.8309e-04
Loss = 7.8682e-02, PNorm = 64.5733, GNorm = 1.0199, lr_0 = 3.8283e-04
Loss = 7.4790e-02, PNorm = 64.5820, GNorm = 0.5274, lr_0 = 3.8257e-04
Loss = 8.0574e-02, PNorm = 64.5854, GNorm = 0.6120, lr_0 = 3.8231e-04
Loss = 6.9660e-02, PNorm = 64.5901, GNorm = 0.6562, lr_0 = 3.8204e-04
Loss = 7.1209e-02, PNorm = 64.5975, GNorm = 0.4392, lr_0 = 3.8178e-04
Loss = 7.2268e-02, PNorm = 64.6055, GNorm = 0.9430, lr_0 = 3.8152e-04
Loss = 7.7923e-02, PNorm = 64.6153, GNorm = 0.6361, lr_0 = 3.8126e-04
Loss = 9.1375e-02, PNorm = 64.6238, GNorm = 1.0225, lr_0 = 3.8100e-04
Loss = 9.2185e-02, PNorm = 64.6336, GNorm = 0.7594, lr_0 = 3.8074e-04
Loss = 8.0563e-02, PNorm = 64.6387, GNorm = 0.6724, lr_0 = 3.8048e-04
Loss = 8.2418e-02, PNorm = 64.6500, GNorm = 0.7490, lr_0 = 3.8022e-04
Loss = 8.1186e-02, PNorm = 64.6646, GNorm = 1.3216, lr_0 = 3.7995e-04
Loss = 7.7839e-02, PNorm = 64.6779, GNorm = 0.6337, lr_0 = 3.7969e-04
Loss = 7.8016e-02, PNorm = 64.6874, GNorm = 0.5849, lr_0 = 3.7943e-04
Loss = 7.0889e-02, PNorm = 64.6964, GNorm = 0.5473, lr_0 = 3.7917e-04
Loss = 8.1554e-02, PNorm = 64.7035, GNorm = 0.4638, lr_0 = 3.7891e-04
Loss = 8.3621e-02, PNorm = 64.7107, GNorm = 0.5983, lr_0 = 3.7866e-04
Loss = 7.4621e-02, PNorm = 64.7185, GNorm = 0.7141, lr_0 = 3.7840e-04
Loss = 7.1975e-02, PNorm = 64.7244, GNorm = 0.6681, lr_0 = 3.7814e-04
Loss = 7.8358e-02, PNorm = 64.7296, GNorm = 0.6356, lr_0 = 3.7788e-04
Loss = 8.1896e-02, PNorm = 64.7408, GNorm = 0.7577, lr_0 = 3.7762e-04
Loss = 7.1277e-02, PNorm = 64.7511, GNorm = 0.4369, lr_0 = 3.7736e-04
Loss = 9.7299e-02, PNorm = 64.7601, GNorm = 0.5504, lr_0 = 3.7710e-04
Loss = 5.9976e-02, PNorm = 64.7686, GNorm = 0.6205, lr_0 = 3.7684e-04
Loss = 8.0646e-02, PNorm = 64.7755, GNorm = 0.6447, lr_0 = 3.7659e-04
Loss = 7.5239e-02, PNorm = 64.7824, GNorm = 0.5454, lr_0 = 3.7633e-04
Loss = 6.4709e-02, PNorm = 64.7892, GNorm = 0.5282, lr_0 = 3.7607e-04
Loss = 7.3358e-02, PNorm = 64.7949, GNorm = 0.6564, lr_0 = 3.7581e-04
Loss = 8.2671e-02, PNorm = 64.8020, GNorm = 0.6474, lr_0 = 3.7555e-04
Loss = 6.9879e-02, PNorm = 64.8099, GNorm = 0.6943, lr_0 = 3.7530e-04
Loss = 7.5084e-02, PNorm = 64.8156, GNorm = 0.4661, lr_0 = 3.7504e-04
Loss = 8.1662e-02, PNorm = 64.8231, GNorm = 1.0030, lr_0 = 3.7478e-04
Loss = 7.5521e-02, PNorm = 64.8324, GNorm = 0.6916, lr_0 = 3.7453e-04
Loss = 8.1205e-02, PNorm = 64.8435, GNorm = 0.4709, lr_0 = 3.7427e-04
Loss = 8.0641e-02, PNorm = 64.8508, GNorm = 1.1467, lr_0 = 3.7401e-04
Loss = 7.7097e-02, PNorm = 64.8550, GNorm = 0.9177, lr_0 = 3.7376e-04
Loss = 8.1272e-02, PNorm = 64.8581, GNorm = 0.9810, lr_0 = 3.7350e-04
Loss = 8.3323e-02, PNorm = 64.8678, GNorm = 0.7948, lr_0 = 3.7325e-04
Loss = 7.5217e-02, PNorm = 64.8788, GNorm = 0.4790, lr_0 = 3.7299e-04
Loss = 7.0615e-02, PNorm = 64.8895, GNorm = 0.4682, lr_0 = 3.7273e-04
Validation mae = 0.392252
Epoch 14
Loss = 6.0388e-02, PNorm = 64.8979, GNorm = 0.3606, lr_0 = 3.7248e-04
Loss = 7.4119e-02, PNorm = 64.9046, GNorm = 0.4494, lr_0 = 3.7222e-04
Loss = 5.9591e-02, PNorm = 64.9146, GNorm = 0.7502, lr_0 = 3.7197e-04
Loss = 5.8894e-02, PNorm = 64.9207, GNorm = 0.6641, lr_0 = 3.7171e-04
Loss = 6.7468e-02, PNorm = 64.9253, GNorm = 0.6512, lr_0 = 3.7146e-04
Loss = 6.3440e-02, PNorm = 64.9318, GNorm = 1.0299, lr_0 = 3.7120e-04
Loss = 7.2559e-02, PNorm = 64.9388, GNorm = 0.5923, lr_0 = 3.7095e-04
Loss = 5.9625e-02, PNorm = 64.9456, GNorm = 0.7561, lr_0 = 3.7070e-04
Loss = 7.0387e-02, PNorm = 64.9536, GNorm = 0.6018, lr_0 = 3.7044e-04
Loss = 5.8096e-02, PNorm = 64.9583, GNorm = 0.6229, lr_0 = 3.7019e-04
Loss = 6.6096e-02, PNorm = 64.9685, GNorm = 0.5487, lr_0 = 3.6993e-04
Loss = 6.8205e-02, PNorm = 64.9863, GNorm = 0.6736, lr_0 = 3.6968e-04
Loss = 6.6924e-02, PNorm = 64.9967, GNorm = 0.6241, lr_0 = 3.6943e-04
Loss = 6.5454e-02, PNorm = 65.0048, GNorm = 0.4423, lr_0 = 3.6917e-04
Loss = 5.6036e-02, PNorm = 65.0139, GNorm = 0.5491, lr_0 = 3.6892e-04
Loss = 6.4945e-02, PNorm = 65.0207, GNorm = 0.6198, lr_0 = 3.6867e-04
Loss = 6.1613e-02, PNorm = 65.0297, GNorm = 0.6237, lr_0 = 3.6842e-04
Loss = 6.2862e-02, PNorm = 65.0366, GNorm = 0.7008, lr_0 = 3.6816e-04
Loss = 7.0763e-02, PNorm = 65.0430, GNorm = 0.6732, lr_0 = 3.6791e-04
Loss = 6.3365e-02, PNorm = 65.0475, GNorm = 0.7243, lr_0 = 3.6766e-04
Loss = 7.1713e-02, PNorm = 65.0562, GNorm = 0.5320, lr_0 = 3.6741e-04
Loss = 6.8728e-02, PNorm = 65.0665, GNorm = 0.6192, lr_0 = 3.6716e-04
Loss = 6.6254e-02, PNorm = 65.0745, GNorm = 1.1257, lr_0 = 3.6690e-04
Loss = 7.6598e-02, PNorm = 65.0797, GNorm = 0.4937, lr_0 = 3.6665e-04
Loss = 7.9275e-02, PNorm = 65.0876, GNorm = 0.5739, lr_0 = 3.6640e-04
Loss = 7.6668e-02, PNorm = 65.0952, GNorm = 1.1064, lr_0 = 3.6615e-04
Loss = 7.1303e-02, PNorm = 65.1013, GNorm = 0.7995, lr_0 = 3.6590e-04
Loss = 6.4031e-02, PNorm = 65.1078, GNorm = 0.5234, lr_0 = 3.6565e-04
Loss = 6.4988e-02, PNorm = 65.1167, GNorm = 0.6778, lr_0 = 3.6540e-04
Loss = 6.0963e-02, PNorm = 65.1232, GNorm = 0.4912, lr_0 = 3.6515e-04
Loss = 7.4279e-02, PNorm = 65.1309, GNorm = 0.7903, lr_0 = 3.6490e-04
Loss = 6.3717e-02, PNorm = 65.1424, GNorm = 0.4339, lr_0 = 3.6465e-04
Loss = 6.5475e-02, PNorm = 65.1500, GNorm = 0.5725, lr_0 = 3.6440e-04
Loss = 5.4676e-02, PNorm = 65.1560, GNorm = 0.4161, lr_0 = 3.6415e-04
Loss = 6.9023e-02, PNorm = 65.1609, GNorm = 0.5307, lr_0 = 3.6390e-04
Loss = 6.8338e-02, PNorm = 65.1646, GNorm = 0.7501, lr_0 = 3.6365e-04
Loss = 7.1529e-02, PNorm = 65.1697, GNorm = 0.8062, lr_0 = 3.6340e-04
Loss = 7.1576e-02, PNorm = 65.1756, GNorm = 0.6392, lr_0 = 3.6315e-04
Loss = 6.6364e-02, PNorm = 65.1830, GNorm = 0.6156, lr_0 = 3.6290e-04
Loss = 5.9029e-02, PNorm = 65.1898, GNorm = 0.5837, lr_0 = 3.6266e-04
Loss = 6.5447e-02, PNorm = 65.1959, GNorm = 0.7618, lr_0 = 3.6241e-04
Loss = 7.5586e-02, PNorm = 65.2063, GNorm = 1.3306, lr_0 = 3.6216e-04
Loss = 7.8899e-02, PNorm = 65.2180, GNorm = 0.5674, lr_0 = 3.6191e-04
Loss = 6.4106e-02, PNorm = 65.2299, GNorm = 0.8005, lr_0 = 3.6166e-04
Loss = 7.6086e-02, PNorm = 65.2412, GNorm = 0.5118, lr_0 = 3.6141e-04
Loss = 6.3479e-02, PNorm = 65.2521, GNorm = 0.9587, lr_0 = 3.6117e-04
Loss = 6.0578e-02, PNorm = 65.2592, GNorm = 0.5416, lr_0 = 3.6092e-04
Loss = 6.7518e-02, PNorm = 65.2625, GNorm = 0.5406, lr_0 = 3.6067e-04
Loss = 7.3079e-02, PNorm = 65.2705, GNorm = 0.5632, lr_0 = 3.6043e-04
Loss = 6.2238e-02, PNorm = 65.2788, GNorm = 0.4418, lr_0 = 3.6018e-04
Loss = 6.7749e-02, PNorm = 65.2863, GNorm = 0.8866, lr_0 = 3.5993e-04
Loss = 6.5714e-02, PNorm = 65.2941, GNorm = 0.5439, lr_0 = 3.5969e-04
Loss = 7.3572e-02, PNorm = 65.3034, GNorm = 0.6041, lr_0 = 3.5944e-04
Loss = 8.8856e-02, PNorm = 65.3166, GNorm = 0.6901, lr_0 = 3.5919e-04
Loss = 6.4644e-02, PNorm = 65.3250, GNorm = 0.7800, lr_0 = 3.5895e-04
Loss = 6.8582e-02, PNorm = 65.3295, GNorm = 0.5427, lr_0 = 3.5870e-04
Loss = 6.9630e-02, PNorm = 65.3385, GNorm = 0.6431, lr_0 = 3.5845e-04
Loss = 7.3748e-02, PNorm = 65.3442, GNorm = 0.6118, lr_0 = 3.5821e-04
Loss = 5.9463e-02, PNorm = 65.3496, GNorm = 0.7302, lr_0 = 3.5796e-04
Loss = 6.5268e-02, PNorm = 65.3575, GNorm = 0.6218, lr_0 = 3.5772e-04
Loss = 6.6656e-02, PNorm = 65.3640, GNorm = 0.6315, lr_0 = 3.5747e-04
Loss = 8.0375e-02, PNorm = 65.3707, GNorm = 0.7674, lr_0 = 3.5723e-04
Loss = 7.1838e-02, PNorm = 65.3739, GNorm = 0.8502, lr_0 = 3.5698e-04
Loss = 5.9927e-02, PNorm = 65.3786, GNorm = 0.4955, lr_0 = 3.5674e-04
Loss = 6.5709e-02, PNorm = 65.3828, GNorm = 0.5029, lr_0 = 3.5650e-04
Loss = 6.6011e-02, PNorm = 65.3922, GNorm = 0.4227, lr_0 = 3.5625e-04
Loss = 6.6704e-02, PNorm = 65.4003, GNorm = 0.6569, lr_0 = 3.5601e-04
Loss = 7.3403e-02, PNorm = 65.4094, GNorm = 0.5865, lr_0 = 3.5576e-04
Loss = 6.6734e-02, PNorm = 65.4162, GNorm = 0.9737, lr_0 = 3.5552e-04
Loss = 6.2778e-02, PNorm = 65.4202, GNorm = 0.5970, lr_0 = 3.5528e-04
Loss = 7.3601e-02, PNorm = 65.4270, GNorm = 0.4422, lr_0 = 3.5503e-04
Loss = 6.4789e-02, PNorm = 65.4354, GNorm = 0.5549, lr_0 = 3.5479e-04
Loss = 6.4928e-02, PNorm = 65.4440, GNorm = 0.4598, lr_0 = 3.5455e-04
Loss = 8.0043e-02, PNorm = 65.4518, GNorm = 0.5745, lr_0 = 3.5430e-04
Loss = 8.7460e-02, PNorm = 65.4646, GNorm = 1.1051, lr_0 = 3.5406e-04
Loss = 6.9766e-02, PNorm = 65.4740, GNorm = 0.5014, lr_0 = 3.5382e-04
Loss = 8.0350e-02, PNorm = 65.4790, GNorm = 0.6608, lr_0 = 3.5358e-04
Loss = 6.2658e-02, PNorm = 65.4860, GNorm = 0.6020, lr_0 = 3.5333e-04
Loss = 6.8838e-02, PNorm = 65.4934, GNorm = 0.6158, lr_0 = 3.5309e-04
Loss = 7.8012e-02, PNorm = 65.5003, GNorm = 0.9738, lr_0 = 3.5285e-04
Loss = 8.1623e-02, PNorm = 65.5119, GNorm = 0.7142, lr_0 = 3.5261e-04
Loss = 6.6008e-02, PNorm = 65.5187, GNorm = 0.7309, lr_0 = 3.5237e-04
Loss = 9.4869e-02, PNorm = 65.5243, GNorm = 1.2412, lr_0 = 3.5212e-04
Loss = 6.9797e-02, PNorm = 65.5332, GNorm = 0.6815, lr_0 = 3.5188e-04
Loss = 7.6584e-02, PNorm = 65.5410, GNorm = 0.6646, lr_0 = 3.5164e-04
Loss = 6.4267e-02, PNorm = 65.5505, GNorm = 0.7287, lr_0 = 3.5140e-04
Loss = 8.3624e-02, PNorm = 65.5590, GNorm = 1.1907, lr_0 = 3.5116e-04
Loss = 7.5258e-02, PNorm = 65.5682, GNorm = 0.7232, lr_0 = 3.5092e-04
Loss = 7.8986e-02, PNorm = 65.5744, GNorm = 0.5327, lr_0 = 3.5068e-04
Loss = 7.9869e-02, PNorm = 65.5822, GNorm = 0.4166, lr_0 = 3.5044e-04
Loss = 7.5134e-02, PNorm = 65.5877, GNorm = 0.5093, lr_0 = 3.5020e-04
Loss = 7.4889e-02, PNorm = 65.5945, GNorm = 1.2516, lr_0 = 3.4996e-04
Loss = 7.3192e-02, PNorm = 65.6018, GNorm = 0.4624, lr_0 = 3.4972e-04
Loss = 7.4254e-02, PNorm = 65.6099, GNorm = 1.4501, lr_0 = 3.4948e-04
Loss = 8.1848e-02, PNorm = 65.6199, GNorm = 1.2055, lr_0 = 3.4924e-04
Loss = 7.2345e-02, PNorm = 65.6278, GNorm = 0.5209, lr_0 = 3.4900e-04
Loss = 6.2941e-02, PNorm = 65.6355, GNorm = 0.6639, lr_0 = 3.4876e-04
Loss = 7.3886e-02, PNorm = 65.6440, GNorm = 0.7189, lr_0 = 3.4852e-04
Loss = 7.0415e-02, PNorm = 65.6531, GNorm = 0.6068, lr_0 = 3.4828e-04
Loss = 7.2623e-02, PNorm = 65.6612, GNorm = 0.6772, lr_0 = 3.4805e-04
Loss = 6.9418e-02, PNorm = 65.6700, GNorm = 0.9020, lr_0 = 3.4781e-04
Loss = 7.5857e-02, PNorm = 65.6742, GNorm = 0.4993, lr_0 = 3.4757e-04
Loss = 6.8951e-02, PNorm = 65.6809, GNorm = 0.5117, lr_0 = 3.4733e-04
Loss = 7.5512e-02, PNorm = 65.6891, GNorm = 0.8703, lr_0 = 3.4709e-04
Loss = 6.5997e-02, PNorm = 65.6981, GNorm = 0.5866, lr_0 = 3.4686e-04
Loss = 7.6500e-02, PNorm = 65.7051, GNorm = 0.6976, lr_0 = 3.4662e-04
Loss = 7.9873e-02, PNorm = 65.7134, GNorm = 0.6969, lr_0 = 3.4638e-04
Loss = 6.9942e-02, PNorm = 65.7232, GNorm = 0.4254, lr_0 = 3.4614e-04
Loss = 6.5267e-02, PNorm = 65.7297, GNorm = 0.6190, lr_0 = 3.4591e-04
Loss = 7.2006e-02, PNorm = 65.7341, GNorm = 0.8745, lr_0 = 3.4567e-04
Loss = 6.0572e-02, PNorm = 65.7374, GNorm = 0.7026, lr_0 = 3.4543e-04
Loss = 6.8172e-02, PNorm = 65.7421, GNorm = 0.6138, lr_0 = 3.4520e-04
Loss = 6.6318e-02, PNorm = 65.7498, GNorm = 0.7331, lr_0 = 3.4496e-04
Loss = 7.1037e-02, PNorm = 65.7590, GNorm = 0.4951, lr_0 = 3.4472e-04
Loss = 7.1934e-02, PNorm = 65.7624, GNorm = 0.5061, lr_0 = 3.4449e-04
Loss = 6.5277e-02, PNorm = 65.7628, GNorm = 0.7886, lr_0 = 3.4425e-04
Loss = 6.7021e-02, PNorm = 65.7698, GNorm = 0.9697, lr_0 = 3.4402e-04
Loss = 7.6156e-02, PNorm = 65.7776, GNorm = 1.1386, lr_0 = 3.4378e-04
Loss = 6.8238e-02, PNorm = 65.7818, GNorm = 0.4157, lr_0 = 3.4354e-04
Loss = 6.4587e-02, PNorm = 65.7873, GNorm = 0.6120, lr_0 = 3.4331e-04
Validation mae = 0.396604
Epoch 15
Loss = 6.3187e-02, PNorm = 65.7972, GNorm = 0.4502, lr_0 = 3.4307e-04
Loss = 5.9722e-02, PNorm = 65.8039, GNorm = 0.7158, lr_0 = 3.4284e-04
Loss = 6.2436e-02, PNorm = 65.8109, GNorm = 0.8540, lr_0 = 3.4260e-04
Loss = 6.1967e-02, PNorm = 65.8172, GNorm = 0.4403, lr_0 = 3.4237e-04
Loss = 6.0959e-02, PNorm = 65.8251, GNorm = 0.6161, lr_0 = 3.4213e-04
Loss = 6.6683e-02, PNorm = 65.8319, GNorm = 0.6179, lr_0 = 3.4190e-04
Loss = 5.5458e-02, PNorm = 65.8388, GNorm = 0.8551, lr_0 = 3.4167e-04
Loss = 7.0199e-02, PNorm = 65.8471, GNorm = 0.5512, lr_0 = 3.4143e-04
Loss = 5.4207e-02, PNorm = 65.8527, GNorm = 0.8523, lr_0 = 3.4120e-04
Loss = 6.9038e-02, PNorm = 65.8597, GNorm = 0.7147, lr_0 = 3.4096e-04
Loss = 6.3929e-02, PNorm = 65.8682, GNorm = 0.8324, lr_0 = 3.4073e-04
Loss = 7.1720e-02, PNorm = 65.8765, GNorm = 0.5084, lr_0 = 3.4050e-04
Loss = 6.9479e-02, PNorm = 65.8864, GNorm = 0.6218, lr_0 = 3.4026e-04
Loss = 5.9915e-02, PNorm = 65.8950, GNorm = 0.5129, lr_0 = 3.4003e-04
Loss = 5.7226e-02, PNorm = 65.9028, GNorm = 0.4678, lr_0 = 3.3980e-04
Loss = 6.3585e-02, PNorm = 65.9055, GNorm = 0.7664, lr_0 = 3.3956e-04
Loss = 5.3216e-02, PNorm = 65.9113, GNorm = 0.8759, lr_0 = 3.3933e-04
Loss = 6.0242e-02, PNorm = 65.9173, GNorm = 0.4940, lr_0 = 3.3910e-04
Loss = 6.6517e-02, PNorm = 65.9234, GNorm = 0.5576, lr_0 = 3.3887e-04
Loss = 7.3470e-02, PNorm = 65.9297, GNorm = 0.9931, lr_0 = 3.3864e-04
Loss = 7.4810e-02, PNorm = 65.9380, GNorm = 0.4301, lr_0 = 3.3840e-04
Loss = 5.9976e-02, PNorm = 65.9422, GNorm = 0.7598, lr_0 = 3.3817e-04
Loss = 5.9950e-02, PNorm = 65.9465, GNorm = 0.4578, lr_0 = 3.3794e-04
Loss = 6.0969e-02, PNorm = 65.9540, GNorm = 0.6920, lr_0 = 3.3771e-04
Loss = 5.9053e-02, PNorm = 65.9601, GNorm = 0.4340, lr_0 = 3.3748e-04
Loss = 6.1571e-02, PNorm = 65.9614, GNorm = 0.5952, lr_0 = 3.3725e-04
Loss = 7.3799e-02, PNorm = 65.9674, GNorm = 0.6022, lr_0 = 3.3701e-04
Loss = 6.6072e-02, PNorm = 65.9745, GNorm = 0.5760, lr_0 = 3.3678e-04
Loss = 5.5329e-02, PNorm = 65.9829, GNorm = 0.5987, lr_0 = 3.3655e-04
Loss = 6.4527e-02, PNorm = 65.9888, GNorm = 0.7253, lr_0 = 3.3632e-04
Loss = 6.0351e-02, PNorm = 65.9948, GNorm = 0.5198, lr_0 = 3.3609e-04
Loss = 7.0131e-02, PNorm = 66.0025, GNorm = 0.8582, lr_0 = 3.3586e-04
Loss = 6.0127e-02, PNorm = 66.0090, GNorm = 0.3988, lr_0 = 3.3563e-04
Loss = 6.1382e-02, PNorm = 66.0152, GNorm = 0.7964, lr_0 = 3.3540e-04
Loss = 5.6233e-02, PNorm = 66.0196, GNorm = 0.5332, lr_0 = 3.3517e-04
Loss = 5.9893e-02, PNorm = 66.0266, GNorm = 0.4892, lr_0 = 3.3494e-04
Loss = 5.8684e-02, PNorm = 66.0361, GNorm = 0.7027, lr_0 = 3.3471e-04
Loss = 6.6531e-02, PNorm = 66.0447, GNorm = 0.4789, lr_0 = 3.3448e-04
Loss = 6.6768e-02, PNorm = 66.0514, GNorm = 0.5433, lr_0 = 3.3425e-04
Loss = 7.1376e-02, PNorm = 66.0591, GNorm = 0.4479, lr_0 = 3.3403e-04
Loss = 6.7109e-02, PNorm = 66.0662, GNorm = 0.7330, lr_0 = 3.3380e-04
Loss = 6.2850e-02, PNorm = 66.0755, GNorm = 0.8297, lr_0 = 3.3357e-04
Loss = 7.1807e-02, PNorm = 66.0824, GNorm = 0.5924, lr_0 = 3.3334e-04
Loss = 7.2340e-02, PNorm = 66.0857, GNorm = 0.5832, lr_0 = 3.3311e-04
Loss = 7.4171e-02, PNorm = 66.0903, GNorm = 0.4715, lr_0 = 3.3288e-04
Loss = 6.0784e-02, PNorm = 66.0962, GNorm = 0.7463, lr_0 = 3.3265e-04
Loss = 6.2156e-02, PNorm = 66.1030, GNorm = 0.8012, lr_0 = 3.3243e-04
Loss = 5.9378e-02, PNorm = 66.1084, GNorm = 0.5432, lr_0 = 3.3220e-04
Loss = 6.3392e-02, PNorm = 66.1133, GNorm = 0.4719, lr_0 = 3.3197e-04
Loss = 6.1913e-02, PNorm = 66.1190, GNorm = 0.9863, lr_0 = 3.3174e-04
Loss = 7.2239e-02, PNorm = 66.1274, GNorm = 0.5633, lr_0 = 3.3152e-04
Loss = 5.9366e-02, PNorm = 66.1358, GNorm = 0.5248, lr_0 = 3.3129e-04
Loss = 5.6209e-02, PNorm = 66.1427, GNorm = 0.6540, lr_0 = 3.3106e-04
Loss = 6.6248e-02, PNorm = 66.1459, GNorm = 0.4409, lr_0 = 3.3084e-04
Loss = 7.3410e-02, PNorm = 66.1532, GNorm = 1.0480, lr_0 = 3.3061e-04
Loss = 6.2405e-02, PNorm = 66.1630, GNorm = 0.9038, lr_0 = 3.3038e-04
Loss = 6.1824e-02, PNorm = 66.1688, GNorm = 1.0113, lr_0 = 3.3016e-04
Loss = 5.9737e-02, PNorm = 66.1737, GNorm = 0.6418, lr_0 = 3.2993e-04
Loss = 6.5843e-02, PNorm = 66.1782, GNorm = 0.9103, lr_0 = 3.2970e-04
Loss = 7.1920e-02, PNorm = 66.1869, GNorm = 1.0999, lr_0 = 3.2948e-04
Loss = 6.4763e-02, PNorm = 66.1954, GNorm = 0.7154, lr_0 = 3.2925e-04
Loss = 6.2397e-02, PNorm = 66.2013, GNorm = 0.8078, lr_0 = 3.2903e-04
Loss = 7.3247e-02, PNorm = 66.2089, GNorm = 0.4463, lr_0 = 3.2880e-04
Loss = 7.1552e-02, PNorm = 66.2195, GNorm = 0.6349, lr_0 = 3.2858e-04
Loss = 6.9990e-02, PNorm = 66.2261, GNorm = 0.6131, lr_0 = 3.2835e-04
Loss = 6.7047e-02, PNorm = 66.2338, GNorm = 0.7837, lr_0 = 3.2813e-04
Loss = 6.5105e-02, PNorm = 66.2397, GNorm = 0.7028, lr_0 = 3.2790e-04
Loss = 6.4395e-02, PNorm = 66.2462, GNorm = 0.7600, lr_0 = 3.2768e-04
Loss = 5.7820e-02, PNorm = 66.2540, GNorm = 0.4345, lr_0 = 3.2745e-04
Loss = 7.1490e-02, PNorm = 66.2619, GNorm = 0.5387, lr_0 = 3.2723e-04
Loss = 6.1991e-02, PNorm = 66.2698, GNorm = 0.6895, lr_0 = 3.2700e-04
Loss = 7.2734e-02, PNorm = 66.2764, GNorm = 0.8877, lr_0 = 3.2678e-04
Loss = 7.0477e-02, PNorm = 66.2821, GNorm = 0.6807, lr_0 = 3.2656e-04
Loss = 7.7829e-02, PNorm = 66.2847, GNorm = 0.5696, lr_0 = 3.2633e-04
Loss = 6.9573e-02, PNorm = 66.2948, GNorm = 0.4766, lr_0 = 3.2611e-04
Loss = 7.2202e-02, PNorm = 66.3073, GNorm = 0.7198, lr_0 = 3.2589e-04
Loss = 6.3904e-02, PNorm = 66.3177, GNorm = 0.8364, lr_0 = 3.2566e-04
Loss = 5.8852e-02, PNorm = 66.3267, GNorm = 0.4929, lr_0 = 3.2544e-04
Loss = 6.8947e-02, PNorm = 66.3364, GNorm = 0.5604, lr_0 = 3.2522e-04
Loss = 5.9100e-02, PNorm = 66.3411, GNorm = 0.4642, lr_0 = 3.2499e-04
Loss = 5.4762e-02, PNorm = 66.3450, GNorm = 0.4723, lr_0 = 3.2477e-04
Loss = 6.4659e-02, PNorm = 66.3491, GNorm = 0.4996, lr_0 = 3.2455e-04
Loss = 6.3682e-02, PNorm = 66.3540, GNorm = 0.6042, lr_0 = 3.2433e-04
Loss = 6.8099e-02, PNorm = 66.3603, GNorm = 0.5996, lr_0 = 3.2410e-04
Loss = 7.3431e-02, PNorm = 66.3672, GNorm = 1.0268, lr_0 = 3.2388e-04
Loss = 7.5505e-02, PNorm = 66.3765, GNorm = 0.5014, lr_0 = 3.2366e-04
Loss = 6.4960e-02, PNorm = 66.3852, GNorm = 0.6946, lr_0 = 3.2344e-04
Loss = 7.0984e-02, PNorm = 66.3912, GNorm = 0.5636, lr_0 = 3.2322e-04
Loss = 6.8266e-02, PNorm = 66.3980, GNorm = 0.5913, lr_0 = 3.2300e-04
Loss = 6.3924e-02, PNorm = 66.4043, GNorm = 0.5818, lr_0 = 3.2277e-04
Loss = 7.5441e-02, PNorm = 66.4125, GNorm = 0.8743, lr_0 = 3.2255e-04
Loss = 7.7921e-02, PNorm = 66.4208, GNorm = 0.5666, lr_0 = 3.2233e-04
Loss = 6.3073e-02, PNorm = 66.4288, GNorm = 0.9954, lr_0 = 3.2211e-04
Loss = 7.2646e-02, PNorm = 66.4353, GNorm = 0.9031, lr_0 = 3.2189e-04
Loss = 6.7172e-02, PNorm = 66.4418, GNorm = 0.8662, lr_0 = 3.2167e-04
Loss = 6.4349e-02, PNorm = 66.4499, GNorm = 0.7266, lr_0 = 3.2145e-04
Loss = 7.5206e-02, PNorm = 66.4557, GNorm = 0.4489, lr_0 = 3.2123e-04
Loss = 5.9034e-02, PNorm = 66.4597, GNorm = 0.6444, lr_0 = 3.2101e-04
Loss = 6.7798e-02, PNorm = 66.4664, GNorm = 0.7819, lr_0 = 3.2079e-04
Loss = 6.5355e-02, PNorm = 66.4698, GNorm = 0.5203, lr_0 = 3.2057e-04
Loss = 6.4075e-02, PNorm = 66.4763, GNorm = 0.8543, lr_0 = 3.2035e-04
Loss = 6.4063e-02, PNorm = 66.4817, GNorm = 0.8355, lr_0 = 3.2013e-04
Loss = 7.0995e-02, PNorm = 66.4886, GNorm = 0.5156, lr_0 = 3.1991e-04
Loss = 6.1242e-02, PNorm = 66.4921, GNorm = 0.4603, lr_0 = 3.1969e-04
Loss = 6.5816e-02, PNorm = 66.4984, GNorm = 0.7441, lr_0 = 3.1947e-04
Loss = 7.2822e-02, PNorm = 66.5058, GNorm = 0.8028, lr_0 = 3.1925e-04
Loss = 7.4959e-02, PNorm = 66.5133, GNorm = 0.7878, lr_0 = 3.1904e-04
Loss = 7.4491e-02, PNorm = 66.5243, GNorm = 0.3778, lr_0 = 3.1882e-04
Loss = 6.6212e-02, PNorm = 66.5316, GNorm = 0.6052, lr_0 = 3.1860e-04
Loss = 6.8877e-02, PNorm = 66.5334, GNorm = 0.5373, lr_0 = 3.1838e-04
Loss = 6.0870e-02, PNorm = 66.5393, GNorm = 0.4876, lr_0 = 3.1816e-04
Loss = 6.4839e-02, PNorm = 66.5459, GNorm = 0.7986, lr_0 = 3.1794e-04
Loss = 6.3615e-02, PNorm = 66.5504, GNorm = 0.7194, lr_0 = 3.1773e-04
Loss = 5.9606e-02, PNorm = 66.5559, GNorm = 0.7636, lr_0 = 3.1751e-04
Loss = 6.1317e-02, PNorm = 66.5634, GNorm = 0.6765, lr_0 = 3.1729e-04
Loss = 5.6844e-02, PNorm = 66.5704, GNorm = 0.7670, lr_0 = 3.1707e-04
Loss = 7.0251e-02, PNorm = 66.5738, GNorm = 0.4582, lr_0 = 3.1686e-04
Loss = 7.9439e-02, PNorm = 66.5785, GNorm = 0.5339, lr_0 = 3.1664e-04
Loss = 6.5974e-02, PNorm = 66.5854, GNorm = 0.6492, lr_0 = 3.1642e-04
Loss = 5.8297e-02, PNorm = 66.5919, GNorm = 0.4779, lr_0 = 3.1621e-04
Validation mae = 0.385731
Epoch 16
Loss = 5.1481e-02, PNorm = 66.5985, GNorm = 0.5164, lr_0 = 3.1599e-04
Loss = 5.7001e-02, PNorm = 66.6061, GNorm = 0.8048, lr_0 = 3.1577e-04
Loss = 6.0092e-02, PNorm = 66.6134, GNorm = 0.4886, lr_0 = 3.1556e-04
Loss = 6.3248e-02, PNorm = 66.6199, GNorm = 0.8425, lr_0 = 3.1534e-04
Loss = 5.4876e-02, PNorm = 66.6265, GNorm = 0.7049, lr_0 = 3.1512e-04
Loss = 5.7071e-02, PNorm = 66.6340, GNorm = 0.5989, lr_0 = 3.1491e-04
Loss = 6.3590e-02, PNorm = 66.6444, GNorm = 0.5087, lr_0 = 3.1469e-04
Loss = 6.4089e-02, PNorm = 66.6490, GNorm = 0.4584, lr_0 = 3.1448e-04
Loss = 5.0784e-02, PNorm = 66.6552, GNorm = 0.6985, lr_0 = 3.1426e-04
Loss = 5.0006e-02, PNorm = 66.6612, GNorm = 0.5630, lr_0 = 3.1405e-04
Loss = 6.3777e-02, PNorm = 66.6700, GNorm = 0.3742, lr_0 = 3.1383e-04
Loss = 5.0979e-02, PNorm = 66.6750, GNorm = 0.4757, lr_0 = 3.1362e-04
Loss = 5.4623e-02, PNorm = 66.6779, GNorm = 0.5440, lr_0 = 3.1340e-04
Loss = 6.1118e-02, PNorm = 66.6844, GNorm = 0.5782, lr_0 = 3.1319e-04
Loss = 6.0858e-02, PNorm = 66.6913, GNorm = 0.5617, lr_0 = 3.1297e-04
Loss = 6.4292e-02, PNorm = 66.7000, GNorm = 0.5701, lr_0 = 3.1276e-04
Loss = 5.2913e-02, PNorm = 66.7062, GNorm = 0.5084, lr_0 = 3.1254e-04
Loss = 5.4719e-02, PNorm = 66.7085, GNorm = 0.6427, lr_0 = 3.1233e-04
Loss = 6.2094e-02, PNorm = 66.7124, GNorm = 0.6979, lr_0 = 3.1212e-04
Loss = 6.1309e-02, PNorm = 66.7185, GNorm = 0.9882, lr_0 = 3.1190e-04
Loss = 5.9783e-02, PNorm = 66.7240, GNorm = 0.6510, lr_0 = 3.1169e-04
Loss = 6.3443e-02, PNorm = 66.7276, GNorm = 0.4563, lr_0 = 3.1147e-04
Loss = 5.8193e-02, PNorm = 66.7354, GNorm = 0.5782, lr_0 = 3.1126e-04
Loss = 6.4609e-02, PNorm = 66.7397, GNorm = 0.4675, lr_0 = 3.1105e-04
Loss = 5.5950e-02, PNorm = 66.7446, GNorm = 0.6780, lr_0 = 3.1083e-04
Loss = 6.2437e-02, PNorm = 66.7497, GNorm = 0.5808, lr_0 = 3.1062e-04
Loss = 4.9129e-02, PNorm = 66.7577, GNorm = 0.4658, lr_0 = 3.1041e-04
Loss = 6.6672e-02, PNorm = 66.7629, GNorm = 0.5666, lr_0 = 3.1020e-04
Loss = 6.6649e-02, PNorm = 66.7704, GNorm = 0.8349, lr_0 = 3.0998e-04
Loss = 6.6403e-02, PNorm = 66.7776, GNorm = 0.7011, lr_0 = 3.0977e-04
Loss = 6.0908e-02, PNorm = 66.7819, GNorm = 0.6429, lr_0 = 3.0956e-04
Loss = 6.0403e-02, PNorm = 66.7846, GNorm = 1.0217, lr_0 = 3.0935e-04
Loss = 6.5386e-02, PNorm = 66.7918, GNorm = 1.0391, lr_0 = 3.0914e-04
Loss = 5.9848e-02, PNorm = 66.8005, GNorm = 0.5366, lr_0 = 3.0892e-04
Loss = 5.8305e-02, PNorm = 66.8100, GNorm = 0.5196, lr_0 = 3.0871e-04
Loss = 5.8841e-02, PNorm = 66.8184, GNorm = 0.6708, lr_0 = 3.0850e-04
Loss = 5.4877e-02, PNorm = 66.8237, GNorm = 0.6007, lr_0 = 3.0829e-04
Loss = 6.0790e-02, PNorm = 66.8289, GNorm = 0.9589, lr_0 = 3.0808e-04
Loss = 6.0721e-02, PNorm = 66.8330, GNorm = 0.6747, lr_0 = 3.0787e-04
Loss = 5.4257e-02, PNorm = 66.8375, GNorm = 0.9064, lr_0 = 3.0766e-04
Loss = 5.4748e-02, PNorm = 66.8437, GNorm = 0.5733, lr_0 = 3.0745e-04
Loss = 5.3242e-02, PNorm = 66.8487, GNorm = 0.4658, lr_0 = 3.0723e-04
Loss = 5.9704e-02, PNorm = 66.8536, GNorm = 0.6398, lr_0 = 3.0702e-04
Loss = 6.5646e-02, PNorm = 66.8594, GNorm = 0.5326, lr_0 = 3.0681e-04
Loss = 6.0525e-02, PNorm = 66.8661, GNorm = 0.6580, lr_0 = 3.0660e-04
Loss = 6.0482e-02, PNorm = 66.8750, GNorm = 0.6754, lr_0 = 3.0639e-04
Loss = 5.6323e-02, PNorm = 66.8819, GNorm = 0.4949, lr_0 = 3.0618e-04
Loss = 6.3193e-02, PNorm = 66.8879, GNorm = 0.9787, lr_0 = 3.0597e-04
Loss = 5.2084e-02, PNorm = 66.8925, GNorm = 0.6213, lr_0 = 3.0576e-04
Loss = 6.5618e-02, PNorm = 66.8967, GNorm = 0.8526, lr_0 = 3.0555e-04
Loss = 6.4139e-02, PNorm = 66.9027, GNorm = 0.5782, lr_0 = 3.0535e-04
Loss = 6.0208e-02, PNorm = 66.9105, GNorm = 0.4181, lr_0 = 3.0514e-04
Loss = 6.4919e-02, PNorm = 66.9168, GNorm = 0.4461, lr_0 = 3.0493e-04
Loss = 5.8031e-02, PNorm = 66.9170, GNorm = 0.7983, lr_0 = 3.0472e-04
Loss = 5.3373e-02, PNorm = 66.9208, GNorm = 0.5062, lr_0 = 3.0451e-04
Loss = 5.5699e-02, PNorm = 66.9254, GNorm = 0.6052, lr_0 = 3.0430e-04
Loss = 6.3006e-02, PNorm = 66.9327, GNorm = 0.7947, lr_0 = 3.0409e-04
Loss = 6.4486e-02, PNorm = 66.9417, GNorm = 0.4680, lr_0 = 3.0388e-04
Loss = 6.4258e-02, PNorm = 66.9501, GNorm = 0.9124, lr_0 = 3.0368e-04
Loss = 5.4162e-02, PNorm = 66.9579, GNorm = 0.5199, lr_0 = 3.0347e-04
Loss = 7.2586e-02, PNorm = 66.9634, GNorm = 0.6331, lr_0 = 3.0326e-04
Loss = 5.7759e-02, PNorm = 66.9719, GNorm = 0.4895, lr_0 = 3.0305e-04
Loss = 6.1173e-02, PNorm = 66.9792, GNorm = 0.5515, lr_0 = 3.0284e-04
Loss = 6.2662e-02, PNorm = 66.9847, GNorm = 0.7215, lr_0 = 3.0264e-04
Loss = 6.0480e-02, PNorm = 66.9885, GNorm = 0.6490, lr_0 = 3.0243e-04
Loss = 6.7092e-02, PNorm = 66.9967, GNorm = 0.5958, lr_0 = 3.0222e-04
Loss = 6.8048e-02, PNorm = 67.0024, GNorm = 0.4961, lr_0 = 3.0202e-04
Loss = 5.1117e-02, PNorm = 67.0117, GNorm = 0.4913, lr_0 = 3.0181e-04
Loss = 5.9683e-02, PNorm = 67.0186, GNorm = 0.4720, lr_0 = 3.0160e-04
Loss = 6.3440e-02, PNorm = 67.0230, GNorm = 0.5545, lr_0 = 3.0140e-04
Loss = 6.2875e-02, PNorm = 67.0268, GNorm = 0.5744, lr_0 = 3.0119e-04
Loss = 5.8652e-02, PNorm = 67.0305, GNorm = 0.5968, lr_0 = 3.0098e-04
Loss = 6.5587e-02, PNorm = 67.0365, GNorm = 0.5315, lr_0 = 3.0078e-04
Loss = 5.2617e-02, PNorm = 67.0434, GNorm = 0.5752, lr_0 = 3.0057e-04
Loss = 5.9485e-02, PNorm = 67.0506, GNorm = 0.5591, lr_0 = 3.0036e-04
Loss = 7.2857e-02, PNorm = 67.0536, GNorm = 0.6013, lr_0 = 3.0016e-04
Loss = 6.0090e-02, PNorm = 67.0572, GNorm = 0.6296, lr_0 = 2.9995e-04
Loss = 7.2368e-02, PNorm = 67.0613, GNorm = 0.4412, lr_0 = 2.9975e-04
Loss = 7.0294e-02, PNorm = 67.0687, GNorm = 1.1604, lr_0 = 2.9954e-04
Loss = 6.3983e-02, PNorm = 67.0781, GNorm = 0.6483, lr_0 = 2.9934e-04
Loss = 6.4795e-02, PNorm = 67.0865, GNorm = 0.6830, lr_0 = 2.9913e-04
Loss = 7.8991e-02, PNorm = 67.0937, GNorm = 0.5854, lr_0 = 2.9893e-04
Loss = 5.6635e-02, PNorm = 67.0993, GNorm = 0.4969, lr_0 = 2.9872e-04
Loss = 6.6112e-02, PNorm = 67.1067, GNorm = 0.5417, lr_0 = 2.9852e-04
Loss = 6.0139e-02, PNorm = 67.1138, GNorm = 0.4690, lr_0 = 2.9831e-04
Loss = 6.4703e-02, PNorm = 67.1176, GNorm = 0.7354, lr_0 = 2.9811e-04
Loss = 4.8325e-02, PNorm = 67.1236, GNorm = 0.5433, lr_0 = 2.9790e-04
Loss = 6.0336e-02, PNorm = 67.1282, GNorm = 0.4577, lr_0 = 2.9770e-04
Loss = 6.4136e-02, PNorm = 67.1337, GNorm = 0.9962, lr_0 = 2.9750e-04
Loss = 5.6857e-02, PNorm = 67.1421, GNorm = 0.4801, lr_0 = 2.9729e-04
Loss = 6.6780e-02, PNorm = 67.1493, GNorm = 0.5632, lr_0 = 2.9709e-04
Loss = 5.8253e-02, PNorm = 67.1569, GNorm = 0.5315, lr_0 = 2.9689e-04
Loss = 5.7434e-02, PNorm = 67.1606, GNorm = 0.7016, lr_0 = 2.9668e-04
Loss = 5.6614e-02, PNorm = 67.1636, GNorm = 0.7302, lr_0 = 2.9648e-04
Loss = 5.5373e-02, PNorm = 67.1692, GNorm = 0.5278, lr_0 = 2.9628e-04
Loss = 5.9810e-02, PNorm = 67.1740, GNorm = 0.7112, lr_0 = 2.9607e-04
Loss = 5.7620e-02, PNorm = 67.1792, GNorm = 0.5063, lr_0 = 2.9587e-04
Loss = 6.1611e-02, PNorm = 67.1851, GNorm = 0.4715, lr_0 = 2.9567e-04
Loss = 6.2301e-02, PNorm = 67.1918, GNorm = 0.8860, lr_0 = 2.9546e-04
Loss = 6.8113e-02, PNorm = 67.1982, GNorm = 0.7766, lr_0 = 2.9526e-04
Loss = 6.9816e-02, PNorm = 67.2041, GNorm = 0.4674, lr_0 = 2.9506e-04
Loss = 6.0497e-02, PNorm = 67.2106, GNorm = 0.4914, lr_0 = 2.9486e-04
Loss = 6.6409e-02, PNorm = 67.2159, GNorm = 0.7781, lr_0 = 2.9466e-04
Loss = 6.5538e-02, PNorm = 67.2210, GNorm = 0.4979, lr_0 = 2.9445e-04
Loss = 5.9102e-02, PNorm = 67.2237, GNorm = 0.7216, lr_0 = 2.9425e-04
Loss = 5.7883e-02, PNorm = 67.2288, GNorm = 0.4034, lr_0 = 2.9405e-04
Loss = 5.6967e-02, PNorm = 67.2342, GNorm = 0.5770, lr_0 = 2.9385e-04
Loss = 5.8392e-02, PNorm = 67.2400, GNorm = 0.4997, lr_0 = 2.9365e-04
Loss = 6.0473e-02, PNorm = 67.2457, GNorm = 0.5984, lr_0 = 2.9345e-04
Loss = 5.8314e-02, PNorm = 67.2481, GNorm = 0.6058, lr_0 = 2.9325e-04
Loss = 5.4317e-02, PNorm = 67.2529, GNorm = 0.5241, lr_0 = 2.9305e-04
Loss = 5.9153e-02, PNorm = 67.2577, GNorm = 0.7631, lr_0 = 2.9284e-04
Loss = 7.3596e-02, PNorm = 67.2632, GNorm = 0.8266, lr_0 = 2.9264e-04
Loss = 6.0745e-02, PNorm = 67.2664, GNorm = 0.5393, lr_0 = 2.9244e-04
Loss = 7.1896e-02, PNorm = 67.2671, GNorm = 0.5643, lr_0 = 2.9224e-04
Loss = 6.2969e-02, PNorm = 67.2694, GNorm = 0.7098, lr_0 = 2.9204e-04
Loss = 5.9922e-02, PNorm = 67.2731, GNorm = 0.5117, lr_0 = 2.9184e-04
Loss = 5.5891e-02, PNorm = 67.2796, GNorm = 0.6489, lr_0 = 2.9164e-04
Loss = 5.9602e-02, PNorm = 67.2850, GNorm = 0.4841, lr_0 = 2.9144e-04
Loss = 6.3048e-02, PNorm = 67.2899, GNorm = 0.5328, lr_0 = 2.9124e-04
Validation mae = 0.386500
Epoch 17
Loss = 5.2915e-02, PNorm = 67.2966, GNorm = 0.5470, lr_0 = 2.9104e-04
Loss = 5.4466e-02, PNorm = 67.3025, GNorm = 0.6038, lr_0 = 2.9084e-04
Loss = 4.7937e-02, PNorm = 67.3083, GNorm = 1.0789, lr_0 = 2.9065e-04
Loss = 4.7138e-02, PNorm = 67.3137, GNorm = 0.5114, lr_0 = 2.9045e-04
Loss = 4.8830e-02, PNorm = 67.3166, GNorm = 0.4882, lr_0 = 2.9025e-04
Loss = 5.7129e-02, PNorm = 67.3209, GNorm = 0.7584, lr_0 = 2.9005e-04
Loss = 5.5666e-02, PNorm = 67.3277, GNorm = 0.5113, lr_0 = 2.8985e-04
Loss = 4.7303e-02, PNorm = 67.3354, GNorm = 1.0347, lr_0 = 2.8965e-04
Loss = 4.8668e-02, PNorm = 67.3420, GNorm = 0.5868, lr_0 = 2.8945e-04
Loss = 4.9867e-02, PNorm = 67.3472, GNorm = 0.4643, lr_0 = 2.8925e-04
Loss = 5.2259e-02, PNorm = 67.3525, GNorm = 0.5735, lr_0 = 2.8906e-04
Loss = 4.9973e-02, PNorm = 67.3560, GNorm = 0.5148, lr_0 = 2.8886e-04
Loss = 5.9815e-02, PNorm = 67.3581, GNorm = 0.5545, lr_0 = 2.8866e-04
Loss = 6.5171e-02, PNorm = 67.3636, GNorm = 0.6006, lr_0 = 2.8846e-04
Loss = 6.0826e-02, PNorm = 67.3719, GNorm = 0.4280, lr_0 = 2.8826e-04
Loss = 5.5377e-02, PNorm = 67.3766, GNorm = 0.6612, lr_0 = 2.8807e-04
Loss = 5.0743e-02, PNorm = 67.3786, GNorm = 0.5441, lr_0 = 2.8787e-04
Loss = 5.1895e-02, PNorm = 67.3835, GNorm = 0.6355, lr_0 = 2.8767e-04
Loss = 5.5035e-02, PNorm = 67.3909, GNorm = 0.4440, lr_0 = 2.8748e-04
Loss = 5.1310e-02, PNorm = 67.3974, GNorm = 0.6049, lr_0 = 2.8728e-04
Loss = 6.1745e-02, PNorm = 67.4050, GNorm = 0.9341, lr_0 = 2.8708e-04
Loss = 5.1738e-02, PNorm = 67.4104, GNorm = 0.5674, lr_0 = 2.8689e-04
Loss = 5.1178e-02, PNorm = 67.4147, GNorm = 0.6631, lr_0 = 2.8669e-04
Loss = 5.3615e-02, PNorm = 67.4181, GNorm = 0.6079, lr_0 = 2.8649e-04
Loss = 4.7021e-02, PNorm = 67.4214, GNorm = 0.5031, lr_0 = 2.8630e-04
Loss = 6.0532e-02, PNorm = 67.4265, GNorm = 0.6331, lr_0 = 2.8610e-04
Loss = 5.5812e-02, PNorm = 67.4299, GNorm = 0.4012, lr_0 = 2.8590e-04
Loss = 6.2022e-02, PNorm = 67.4324, GNorm = 0.9553, lr_0 = 2.8571e-04
Loss = 6.0343e-02, PNorm = 67.4358, GNorm = 0.4334, lr_0 = 2.8551e-04
Loss = 5.7086e-02, PNorm = 67.4400, GNorm = 0.7356, lr_0 = 2.8532e-04
Loss = 6.0999e-02, PNorm = 67.4473, GNorm = 0.7971, lr_0 = 2.8512e-04
Loss = 5.7839e-02, PNorm = 67.4551, GNorm = 0.4813, lr_0 = 2.8493e-04
Loss = 5.7266e-02, PNorm = 67.4628, GNorm = 0.5729, lr_0 = 2.8473e-04
Loss = 4.8868e-02, PNorm = 67.4688, GNorm = 0.6642, lr_0 = 2.8454e-04
Loss = 6.0736e-02, PNorm = 67.4751, GNorm = 0.6281, lr_0 = 2.8434e-04
Loss = 5.6139e-02, PNorm = 67.4787, GNorm = 0.5556, lr_0 = 2.8415e-04
Loss = 6.0389e-02, PNorm = 67.4838, GNorm = 0.9669, lr_0 = 2.8395e-04
Loss = 5.9542e-02, PNorm = 67.4892, GNorm = 1.2807, lr_0 = 2.8376e-04
Loss = 5.8896e-02, PNorm = 67.4971, GNorm = 0.7061, lr_0 = 2.8356e-04
Loss = 6.0401e-02, PNorm = 67.5044, GNorm = 0.3375, lr_0 = 2.8337e-04
Loss = 5.2358e-02, PNorm = 67.5109, GNorm = 0.5526, lr_0 = 2.8317e-04
Loss = 5.4137e-02, PNorm = 67.5174, GNorm = 0.5659, lr_0 = 2.8298e-04
Loss = 6.3593e-02, PNorm = 67.5257, GNorm = 0.7491, lr_0 = 2.8279e-04
Loss = 5.1035e-02, PNorm = 67.5315, GNorm = 0.4585, lr_0 = 2.8259e-04
Loss = 4.6956e-02, PNorm = 67.5367, GNorm = 0.7901, lr_0 = 2.8240e-04
Loss = 5.4181e-02, PNorm = 67.5425, GNorm = 0.6012, lr_0 = 2.8221e-04
Loss = 5.7770e-02, PNorm = 67.5479, GNorm = 0.5101, lr_0 = 2.8201e-04
Loss = 5.0476e-02, PNorm = 67.5524, GNorm = 0.5396, lr_0 = 2.8182e-04
Loss = 5.5907e-02, PNorm = 67.5577, GNorm = 0.6977, lr_0 = 2.8163e-04
Loss = 5.7726e-02, PNorm = 67.5634, GNorm = 0.5684, lr_0 = 2.8143e-04
Loss = 5.2806e-02, PNorm = 67.5705, GNorm = 0.5923, lr_0 = 2.8124e-04
Loss = 6.3005e-02, PNorm = 67.5763, GNorm = 0.4566, lr_0 = 2.8105e-04
Loss = 4.9952e-02, PNorm = 67.5813, GNorm = 0.4010, lr_0 = 2.8085e-04
Loss = 5.7959e-02, PNorm = 67.5849, GNorm = 0.6206, lr_0 = 2.8066e-04
Loss = 4.8290e-02, PNorm = 67.5901, GNorm = 0.4105, lr_0 = 2.8047e-04
Loss = 5.3723e-02, PNorm = 67.5969, GNorm = 0.5030, lr_0 = 2.8028e-04
Loss = 6.4286e-02, PNorm = 67.6032, GNorm = 0.5685, lr_0 = 2.8009e-04
Loss = 5.2136e-02, PNorm = 67.6089, GNorm = 0.5483, lr_0 = 2.7989e-04
Loss = 5.3784e-02, PNorm = 67.6136, GNorm = 0.6864, lr_0 = 2.7970e-04
Loss = 5.0207e-02, PNorm = 67.6217, GNorm = 0.4780, lr_0 = 2.7951e-04
Loss = 6.7717e-02, PNorm = 67.6273, GNorm = 0.9313, lr_0 = 2.7932e-04
Loss = 6.3994e-02, PNorm = 67.6336, GNorm = 0.5102, lr_0 = 2.7913e-04
Loss = 6.6406e-02, PNorm = 67.6423, GNorm = 1.0532, lr_0 = 2.7894e-04
Loss = 6.4943e-02, PNorm = 67.6456, GNorm = 0.9006, lr_0 = 2.7875e-04
Loss = 6.0196e-02, PNorm = 67.6483, GNorm = 0.5555, lr_0 = 2.7855e-04
Loss = 5.1299e-02, PNorm = 67.6522, GNorm = 0.4206, lr_0 = 2.7836e-04
Loss = 5.3724e-02, PNorm = 67.6547, GNorm = 0.4521, lr_0 = 2.7817e-04
Loss = 6.7996e-02, PNorm = 67.6594, GNorm = 0.9913, lr_0 = 2.7798e-04
Loss = 5.7240e-02, PNorm = 67.6655, GNorm = 0.4568, lr_0 = 2.7779e-04
Loss = 5.9504e-02, PNorm = 67.6715, GNorm = 0.5046, lr_0 = 2.7760e-04
Loss = 5.0707e-02, PNorm = 67.6766, GNorm = 0.4239, lr_0 = 2.7741e-04
Loss = 5.6121e-02, PNorm = 67.6817, GNorm = 0.3885, lr_0 = 2.7722e-04
Loss = 5.6056e-02, PNorm = 67.6858, GNorm = 0.7405, lr_0 = 2.7703e-04
Loss = 6.3120e-02, PNorm = 67.6914, GNorm = 0.5078, lr_0 = 2.7684e-04
Loss = 6.8242e-02, PNorm = 67.6955, GNorm = 0.8059, lr_0 = 2.7665e-04
Loss = 5.3230e-02, PNorm = 67.6997, GNorm = 0.7893, lr_0 = 2.7646e-04
Loss = 5.8551e-02, PNorm = 67.7055, GNorm = 0.5973, lr_0 = 2.7627e-04
Loss = 5.6236e-02, PNorm = 67.7108, GNorm = 0.5105, lr_0 = 2.7608e-04
Loss = 5.3929e-02, PNorm = 67.7163, GNorm = 0.4724, lr_0 = 2.7590e-04
Loss = 6.4166e-02, PNorm = 67.7220, GNorm = 0.4257, lr_0 = 2.7571e-04
Loss = 6.3535e-02, PNorm = 67.7262, GNorm = 0.7675, lr_0 = 2.7552e-04
Loss = 5.7110e-02, PNorm = 67.7302, GNorm = 0.5548, lr_0 = 2.7533e-04
Loss = 5.7023e-02, PNorm = 67.7346, GNorm = 0.5652, lr_0 = 2.7514e-04
Loss = 5.7061e-02, PNorm = 67.7404, GNorm = 0.5513, lr_0 = 2.7495e-04
Loss = 5.7177e-02, PNorm = 67.7451, GNorm = 0.4804, lr_0 = 2.7476e-04
Loss = 5.2959e-02, PNorm = 67.7497, GNorm = 0.6891, lr_0 = 2.7457e-04
Loss = 5.7466e-02, PNorm = 67.7547, GNorm = 0.6918, lr_0 = 2.7439e-04
Loss = 5.5750e-02, PNorm = 67.7604, GNorm = 0.8474, lr_0 = 2.7420e-04
Loss = 6.1325e-02, PNorm = 67.7667, GNorm = 0.5302, lr_0 = 2.7401e-04
Loss = 5.7505e-02, PNorm = 67.7735, GNorm = 0.4819, lr_0 = 2.7382e-04
Loss = 5.5042e-02, PNorm = 67.7789, GNorm = 0.4402, lr_0 = 2.7364e-04
Loss = 5.5738e-02, PNorm = 67.7848, GNorm = 0.5660, lr_0 = 2.7345e-04
Loss = 5.8991e-02, PNorm = 67.7892, GNorm = 0.8761, lr_0 = 2.7326e-04
Loss = 5.2524e-02, PNorm = 67.7955, GNorm = 0.3850, lr_0 = 2.7307e-04
Loss = 5.2771e-02, PNorm = 67.8007, GNorm = 0.8358, lr_0 = 2.7289e-04
Loss = 5.8217e-02, PNorm = 67.8046, GNorm = 0.6116, lr_0 = 2.7270e-04
Loss = 6.1266e-02, PNorm = 67.8119, GNorm = 0.8616, lr_0 = 2.7251e-04
Loss = 6.1071e-02, PNorm = 67.8164, GNorm = 0.6034, lr_0 = 2.7233e-04
Loss = 5.1190e-02, PNorm = 67.8193, GNorm = 1.0176, lr_0 = 2.7214e-04
Loss = 5.7596e-02, PNorm = 67.8212, GNorm = 0.6795, lr_0 = 2.7195e-04
Loss = 6.8660e-02, PNorm = 67.8252, GNorm = 0.7266, lr_0 = 2.7177e-04
Loss = 5.7179e-02, PNorm = 67.8288, GNorm = 0.5840, lr_0 = 2.7158e-04
Loss = 5.9765e-02, PNorm = 67.8326, GNorm = 0.6860, lr_0 = 2.7139e-04
Loss = 5.9386e-02, PNorm = 67.8378, GNorm = 0.5992, lr_0 = 2.7121e-04
Loss = 5.7990e-02, PNorm = 67.8428, GNorm = 0.6725, lr_0 = 2.7102e-04
Loss = 5.0244e-02, PNorm = 67.8482, GNorm = 0.7948, lr_0 = 2.7084e-04
Loss = 6.5369e-02, PNorm = 67.8533, GNorm = 0.6752, lr_0 = 2.7065e-04
Loss = 5.3249e-02, PNorm = 67.8568, GNorm = 0.7693, lr_0 = 2.7047e-04
Loss = 5.4516e-02, PNorm = 67.8617, GNorm = 0.5011, lr_0 = 2.7028e-04
Loss = 5.5975e-02, PNorm = 67.8650, GNorm = 0.8849, lr_0 = 2.7010e-04
Loss = 5.8410e-02, PNorm = 67.8680, GNorm = 0.5220, lr_0 = 2.6991e-04
Loss = 6.2002e-02, PNorm = 67.8715, GNorm = 0.5957, lr_0 = 2.6973e-04
Loss = 5.8286e-02, PNorm = 67.8736, GNorm = 0.5376, lr_0 = 2.6954e-04
Loss = 5.5002e-02, PNorm = 67.8758, GNorm = 0.4243, lr_0 = 2.6936e-04
Loss = 5.7855e-02, PNorm = 67.8787, GNorm = 0.6183, lr_0 = 2.6917e-04
Loss = 5.9149e-02, PNorm = 67.8831, GNorm = 0.7207, lr_0 = 2.6899e-04
Loss = 5.7842e-02, PNorm = 67.8879, GNorm = 0.5387, lr_0 = 2.6880e-04
Loss = 6.5178e-02, PNorm = 67.8939, GNorm = 0.5134, lr_0 = 2.6862e-04
Loss = 5.6160e-02, PNorm = 67.9000, GNorm = 0.6544, lr_0 = 2.6844e-04
Loss = 6.8288e-02, PNorm = 67.9070, GNorm = 0.6262, lr_0 = 2.6825e-04
Validation mae = 0.389000
Epoch 18
Loss = 4.8560e-02, PNorm = 67.9144, GNorm = 0.5076, lr_0 = 2.6807e-04
Loss = 5.0776e-02, PNorm = 67.9196, GNorm = 0.6810, lr_0 = 2.6788e-04
Loss = 5.2929e-02, PNorm = 67.9269, GNorm = 0.4983, lr_0 = 2.6770e-04
Loss = 4.8594e-02, PNorm = 67.9302, GNorm = 0.4856, lr_0 = 2.6752e-04
Loss = 5.7856e-02, PNorm = 67.9325, GNorm = 0.4419, lr_0 = 2.6733e-04
Loss = 5.4032e-02, PNorm = 67.9355, GNorm = 0.5532, lr_0 = 2.6715e-04
Loss = 4.3893e-02, PNorm = 67.9408, GNorm = 0.6401, lr_0 = 2.6697e-04
Loss = 5.4430e-02, PNorm = 67.9451, GNorm = 0.9574, lr_0 = 2.6678e-04
Loss = 4.9976e-02, PNorm = 67.9513, GNorm = 0.3985, lr_0 = 2.6660e-04
Loss = 4.6306e-02, PNorm = 67.9567, GNorm = 0.8486, lr_0 = 2.6642e-04
Loss = 5.2190e-02, PNorm = 67.9614, GNorm = 0.6425, lr_0 = 2.6624e-04
Loss = 5.7332e-02, PNorm = 67.9661, GNorm = 0.5771, lr_0 = 2.6605e-04
Loss = 4.6547e-02, PNorm = 67.9713, GNorm = 0.4997, lr_0 = 2.6587e-04
Loss = 4.9882e-02, PNorm = 67.9785, GNorm = 0.5840, lr_0 = 2.6569e-04
Loss = 4.5533e-02, PNorm = 67.9832, GNorm = 0.7240, lr_0 = 2.6551e-04
Loss = 4.8468e-02, PNorm = 67.9878, GNorm = 0.5375, lr_0 = 2.6533e-04
Loss = 5.2183e-02, PNorm = 67.9928, GNorm = 0.5107, lr_0 = 2.6514e-04
Loss = 4.5060e-02, PNorm = 67.9970, GNorm = 0.5180, lr_0 = 2.6496e-04
Loss = 4.9927e-02, PNorm = 68.0014, GNorm = 0.5168, lr_0 = 2.6478e-04
Loss = 5.3492e-02, PNorm = 68.0064, GNorm = 0.5372, lr_0 = 2.6460e-04
Loss = 4.6316e-02, PNorm = 68.0106, GNorm = 0.7333, lr_0 = 2.6442e-04
Loss = 5.4211e-02, PNorm = 68.0179, GNorm = 0.7976, lr_0 = 2.6424e-04
Loss = 4.8327e-02, PNorm = 68.0237, GNorm = 0.6995, lr_0 = 2.6406e-04
Loss = 5.2923e-02, PNorm = 68.0272, GNorm = 0.5586, lr_0 = 2.6388e-04
Loss = 4.5375e-02, PNorm = 68.0313, GNorm = 0.4600, lr_0 = 2.6369e-04
Loss = 5.4037e-02, PNorm = 68.0363, GNorm = 0.9412, lr_0 = 2.6351e-04
Loss = 5.2928e-02, PNorm = 68.0397, GNorm = 0.8367, lr_0 = 2.6333e-04
Loss = 4.6851e-02, PNorm = 68.0437, GNorm = 0.3894, lr_0 = 2.6315e-04
Loss = 5.1481e-02, PNorm = 68.0468, GNorm = 0.4059, lr_0 = 2.6297e-04
Loss = 5.6199e-02, PNorm = 68.0514, GNorm = 0.9023, lr_0 = 2.6279e-04
Loss = 4.6715e-02, PNorm = 68.0556, GNorm = 0.4576, lr_0 = 2.6261e-04
Loss = 4.8264e-02, PNorm = 68.0604, GNorm = 0.4915, lr_0 = 2.6243e-04
Loss = 4.6908e-02, PNorm = 68.0656, GNorm = 0.5441, lr_0 = 2.6225e-04
Loss = 4.3842e-02, PNorm = 68.0722, GNorm = 0.6118, lr_0 = 2.6207e-04
Loss = 4.6103e-02, PNorm = 68.0766, GNorm = 0.7492, lr_0 = 2.6189e-04
Loss = 6.0925e-02, PNorm = 68.0775, GNorm = 0.4819, lr_0 = 2.6171e-04
Loss = 4.9692e-02, PNorm = 68.0828, GNorm = 0.3830, lr_0 = 2.6153e-04
Loss = 5.3253e-02, PNorm = 68.0899, GNorm = 0.6017, lr_0 = 2.6136e-04
Loss = 5.1950e-02, PNorm = 68.0949, GNorm = 0.4164, lr_0 = 2.6118e-04
Loss = 5.8775e-02, PNorm = 68.0996, GNorm = 0.4244, lr_0 = 2.6100e-04
Loss = 4.7196e-02, PNorm = 68.1042, GNorm = 0.5659, lr_0 = 2.6082e-04
Loss = 5.3605e-02, PNorm = 68.1088, GNorm = 0.4331, lr_0 = 2.6064e-04
Loss = 4.9517e-02, PNorm = 68.1123, GNorm = 0.6526, lr_0 = 2.6046e-04
Loss = 5.4722e-02, PNorm = 68.1165, GNorm = 0.5160, lr_0 = 2.6028e-04
Loss = 5.3359e-02, PNorm = 68.1211, GNorm = 0.4836, lr_0 = 2.6011e-04
Loss = 6.4075e-02, PNorm = 68.1279, GNorm = 0.5751, lr_0 = 2.5993e-04
Loss = 6.0302e-02, PNorm = 68.1356, GNorm = 0.4399, lr_0 = 2.5975e-04
Loss = 5.1885e-02, PNorm = 68.1435, GNorm = 0.3869, lr_0 = 2.5957e-04
Loss = 5.4383e-02, PNorm = 68.1500, GNorm = 0.5909, lr_0 = 2.5939e-04
Loss = 6.3346e-02, PNorm = 68.1539, GNorm = 0.4523, lr_0 = 2.5922e-04
Loss = 5.6866e-02, PNorm = 68.1583, GNorm = 0.5710, lr_0 = 2.5904e-04
Loss = 5.1859e-02, PNorm = 68.1630, GNorm = 0.4422, lr_0 = 2.5886e-04
Loss = 6.1796e-02, PNorm = 68.1668, GNorm = 0.4739, lr_0 = 2.5868e-04
Loss = 5.0291e-02, PNorm = 68.1707, GNorm = 0.5963, lr_0 = 2.5851e-04
Loss = 5.3104e-02, PNorm = 68.1756, GNorm = 0.7875, lr_0 = 2.5833e-04
Loss = 5.2905e-02, PNorm = 68.1825, GNorm = 0.7708, lr_0 = 2.5815e-04
Loss = 5.9086e-02, PNorm = 68.1855, GNorm = 0.5879, lr_0 = 2.5797e-04
Loss = 5.7976e-02, PNorm = 68.1876, GNorm = 0.8886, lr_0 = 2.5780e-04
Loss = 6.5837e-02, PNorm = 68.1931, GNorm = 0.7819, lr_0 = 2.5762e-04
Loss = 5.4312e-02, PNorm = 68.1974, GNorm = 0.6465, lr_0 = 2.5745e-04
Loss = 5.0055e-02, PNorm = 68.2037, GNorm = 0.4401, lr_0 = 2.5727e-04
Loss = 5.0152e-02, PNorm = 68.2094, GNorm = 0.4702, lr_0 = 2.5709e-04
Loss = 5.1050e-02, PNorm = 68.2147, GNorm = 0.3698, lr_0 = 2.5692e-04
Loss = 5.0898e-02, PNorm = 68.2195, GNorm = 0.4443, lr_0 = 2.5674e-04
Loss = 4.6499e-02, PNorm = 68.2240, GNorm = 0.4764, lr_0 = 2.5656e-04
Loss = 5.1951e-02, PNorm = 68.2294, GNorm = 0.4538, lr_0 = 2.5639e-04
Loss = 5.0443e-02, PNorm = 68.2347, GNorm = 0.4337, lr_0 = 2.5621e-04
Loss = 5.3359e-02, PNorm = 68.2387, GNorm = 0.6876, lr_0 = 2.5604e-04
Loss = 5.9544e-02, PNorm = 68.2440, GNorm = 0.6993, lr_0 = 2.5586e-04
Loss = 5.1629e-02, PNorm = 68.2490, GNorm = 0.4673, lr_0 = 2.5569e-04
Loss = 5.1527e-02, PNorm = 68.2530, GNorm = 0.5128, lr_0 = 2.5551e-04
Loss = 5.7094e-02, PNorm = 68.2561, GNorm = 0.8300, lr_0 = 2.5534e-04
Loss = 5.3025e-02, PNorm = 68.2601, GNorm = 0.5826, lr_0 = 2.5516e-04
Loss = 4.7328e-02, PNorm = 68.2653, GNorm = 0.4608, lr_0 = 2.5499e-04
Loss = 4.4602e-02, PNorm = 68.2680, GNorm = 0.3741, lr_0 = 2.5481e-04
Loss = 5.4127e-02, PNorm = 68.2722, GNorm = 0.4824, lr_0 = 2.5464e-04
Loss = 4.2171e-02, PNorm = 68.2770, GNorm = 0.5141, lr_0 = 2.5446e-04
Loss = 5.0618e-02, PNorm = 68.2800, GNorm = 0.5662, lr_0 = 2.5429e-04
Loss = 5.6647e-02, PNorm = 68.2830, GNorm = 1.1912, lr_0 = 2.5411e-04
Loss = 5.2845e-02, PNorm = 68.2884, GNorm = 0.5083, lr_0 = 2.5394e-04
Loss = 6.3516e-02, PNorm = 68.2949, GNorm = 0.6238, lr_0 = 2.5377e-04
Loss = 5.3633e-02, PNorm = 68.3007, GNorm = 0.5629, lr_0 = 2.5359e-04
Loss = 5.1996e-02, PNorm = 68.3046, GNorm = 0.4546, lr_0 = 2.5342e-04
Loss = 5.3846e-02, PNorm = 68.3084, GNorm = 0.7549, lr_0 = 2.5325e-04
Loss = 6.1899e-02, PNorm = 68.3114, GNorm = 0.8298, lr_0 = 2.5307e-04
Loss = 6.2690e-02, PNorm = 68.3150, GNorm = 0.5120, lr_0 = 2.5290e-04
Loss = 5.6004e-02, PNorm = 68.3213, GNorm = 0.6364, lr_0 = 2.5273e-04
Loss = 5.6808e-02, PNorm = 68.3262, GNorm = 0.8418, lr_0 = 2.5255e-04
Loss = 5.7681e-02, PNorm = 68.3318, GNorm = 0.6042, lr_0 = 2.5238e-04
Loss = 5.3210e-02, PNorm = 68.3366, GNorm = 0.7150, lr_0 = 2.5221e-04
Loss = 5.1031e-02, PNorm = 68.3413, GNorm = 0.4571, lr_0 = 2.5203e-04
Loss = 6.0957e-02, PNorm = 68.3440, GNorm = 0.5043, lr_0 = 2.5186e-04
Loss = 5.6117e-02, PNorm = 68.3467, GNorm = 0.6619, lr_0 = 2.5169e-04
Loss = 5.0974e-02, PNorm = 68.3498, GNorm = 0.4544, lr_0 = 2.5152e-04
Loss = 5.4482e-02, PNorm = 68.3522, GNorm = 0.4829, lr_0 = 2.5134e-04
Loss = 5.5774e-02, PNorm = 68.3581, GNorm = 0.6870, lr_0 = 2.5117e-04
Loss = 5.1153e-02, PNorm = 68.3642, GNorm = 0.4924, lr_0 = 2.5100e-04
Loss = 6.0652e-02, PNorm = 68.3709, GNorm = 0.5250, lr_0 = 2.5083e-04
Loss = 4.4520e-02, PNorm = 68.3764, GNorm = 0.5355, lr_0 = 2.5066e-04
Loss = 5.9503e-02, PNorm = 68.3811, GNorm = 1.1799, lr_0 = 2.5048e-04
Loss = 5.3317e-02, PNorm = 68.3850, GNorm = 0.4535, lr_0 = 2.5031e-04
Loss = 5.4798e-02, PNorm = 68.3872, GNorm = 0.5538, lr_0 = 2.5014e-04
Loss = 5.2961e-02, PNorm = 68.3921, GNorm = 0.3774, lr_0 = 2.4997e-04
Loss = 5.4395e-02, PNorm = 68.3965, GNorm = 0.8961, lr_0 = 2.4980e-04
Loss = 5.8642e-02, PNorm = 68.4025, GNorm = 0.6341, lr_0 = 2.4963e-04
Loss = 4.9707e-02, PNorm = 68.4065, GNorm = 0.6180, lr_0 = 2.4946e-04
Loss = 5.2523e-02, PNorm = 68.4106, GNorm = 0.7841, lr_0 = 2.4929e-04
Loss = 5.6216e-02, PNorm = 68.4157, GNorm = 0.4387, lr_0 = 2.4911e-04
Loss = 6.1341e-02, PNorm = 68.4216, GNorm = 0.7750, lr_0 = 2.4894e-04
Loss = 5.2415e-02, PNorm = 68.4269, GNorm = 0.6278, lr_0 = 2.4877e-04
Loss = 5.7550e-02, PNorm = 68.4318, GNorm = 0.6392, lr_0 = 2.4860e-04
Loss = 5.5603e-02, PNorm = 68.4352, GNorm = 0.4591, lr_0 = 2.4843e-04
Loss = 5.3377e-02, PNorm = 68.4392, GNorm = 0.8322, lr_0 = 2.4826e-04
Loss = 6.3524e-02, PNorm = 68.4399, GNorm = 0.5216, lr_0 = 2.4809e-04
Loss = 6.2285e-02, PNorm = 68.4433, GNorm = 0.7327, lr_0 = 2.4792e-04
Loss = 5.5197e-02, PNorm = 68.4476, GNorm = 0.7030, lr_0 = 2.4775e-04
Loss = 5.4917e-02, PNorm = 68.4520, GNorm = 0.8551, lr_0 = 2.4758e-04
Loss = 5.4931e-02, PNorm = 68.4541, GNorm = 0.9513, lr_0 = 2.4741e-04
Loss = 4.9287e-02, PNorm = 68.4576, GNorm = 0.7466, lr_0 = 2.4724e-04
Loss = 5.0729e-02, PNorm = 68.4602, GNorm = 0.5201, lr_0 = 2.4707e-04
Validation mae = 0.383367
Epoch 19
Loss = 5.6040e-02, PNorm = 68.4645, GNorm = 0.6755, lr_0 = 2.4690e-04
Loss = 5.0772e-02, PNorm = 68.4700, GNorm = 0.4209, lr_0 = 2.4674e-04
Loss = 4.7682e-02, PNorm = 68.4738, GNorm = 0.6637, lr_0 = 2.4657e-04
Loss = 5.0174e-02, PNorm = 68.4794, GNorm = 0.3786, lr_0 = 2.4640e-04
Loss = 4.1297e-02, PNorm = 68.4850, GNorm = 0.5611, lr_0 = 2.4623e-04
Loss = 4.9263e-02, PNorm = 68.4889, GNorm = 0.5854, lr_0 = 2.4606e-04
Loss = 5.3308e-02, PNorm = 68.4942, GNorm = 0.7960, lr_0 = 2.4589e-04
Loss = 4.1612e-02, PNorm = 68.4986, GNorm = 0.3706, lr_0 = 2.4572e-04
Loss = 4.2426e-02, PNorm = 68.5034, GNorm = 0.7096, lr_0 = 2.4556e-04
Loss = 4.1790e-02, PNorm = 68.5079, GNorm = 0.4107, lr_0 = 2.4539e-04
Loss = 6.0718e-02, PNorm = 68.5136, GNorm = 0.6639, lr_0 = 2.4522e-04
Loss = 5.1557e-02, PNorm = 68.5193, GNorm = 0.4841, lr_0 = 2.4505e-04
Loss = 4.9614e-02, PNorm = 68.5242, GNorm = 0.5509, lr_0 = 2.4488e-04
Loss = 4.7657e-02, PNorm = 68.5289, GNorm = 0.7361, lr_0 = 2.4472e-04
Loss = 4.6456e-02, PNorm = 68.5316, GNorm = 0.8785, lr_0 = 2.4455e-04
Loss = 4.3872e-02, PNorm = 68.5359, GNorm = 0.4381, lr_0 = 2.4438e-04
Loss = 4.7470e-02, PNorm = 68.5419, GNorm = 0.6463, lr_0 = 2.4421e-04
Loss = 5.2143e-02, PNorm = 68.5468, GNorm = 0.6107, lr_0 = 2.4405e-04
Loss = 5.0985e-02, PNorm = 68.5516, GNorm = 0.5317, lr_0 = 2.4388e-04
Loss = 5.0383e-02, PNorm = 68.5560, GNorm = 0.4518, lr_0 = 2.4371e-04
Loss = 5.2027e-02, PNorm = 68.5595, GNorm = 0.7110, lr_0 = 2.4354e-04
Loss = 4.3341e-02, PNorm = 68.5662, GNorm = 0.4521, lr_0 = 2.4338e-04
Loss = 4.0260e-02, PNorm = 68.5712, GNorm = 0.5731, lr_0 = 2.4321e-04
Loss = 4.7848e-02, PNorm = 68.5749, GNorm = 0.4735, lr_0 = 2.4304e-04
Loss = 5.2369e-02, PNorm = 68.5786, GNorm = 0.4453, lr_0 = 2.4288e-04
Loss = 5.0548e-02, PNorm = 68.5823, GNorm = 0.3757, lr_0 = 2.4271e-04
Loss = 4.7394e-02, PNorm = 68.5878, GNorm = 0.6103, lr_0 = 2.4254e-04
Loss = 5.2797e-02, PNorm = 68.5916, GNorm = 0.7306, lr_0 = 2.4238e-04
Loss = 4.8200e-02, PNorm = 68.5951, GNorm = 0.7903, lr_0 = 2.4221e-04
Loss = 5.0160e-02, PNorm = 68.6011, GNorm = 0.8271, lr_0 = 2.4205e-04
Loss = 4.2208e-02, PNorm = 68.6058, GNorm = 0.4433, lr_0 = 2.4188e-04
Loss = 4.8460e-02, PNorm = 68.6097, GNorm = 0.5858, lr_0 = 2.4171e-04
Loss = 4.9743e-02, PNorm = 68.6131, GNorm = 0.4541, lr_0 = 2.4155e-04
Loss = 5.5692e-02, PNorm = 68.6178, GNorm = 0.4513, lr_0 = 2.4138e-04
Loss = 4.7687e-02, PNorm = 68.6251, GNorm = 0.6662, lr_0 = 2.4122e-04
Loss = 6.0274e-02, PNorm = 68.6315, GNorm = 0.4830, lr_0 = 2.4105e-04
Loss = 5.4106e-02, PNorm = 68.6374, GNorm = 0.7378, lr_0 = 2.4089e-04
Loss = 5.0736e-02, PNorm = 68.6416, GNorm = 0.5817, lr_0 = 2.4072e-04
Loss = 5.4794e-02, PNorm = 68.6441, GNorm = 0.5107, lr_0 = 2.4056e-04
Loss = 4.8221e-02, PNorm = 68.6467, GNorm = 0.4155, lr_0 = 2.4039e-04
Loss = 4.9532e-02, PNorm = 68.6512, GNorm = 0.5236, lr_0 = 2.4023e-04
Loss = 5.0398e-02, PNorm = 68.6556, GNorm = 0.5435, lr_0 = 2.4006e-04
Loss = 4.9196e-02, PNorm = 68.6604, GNorm = 0.4230, lr_0 = 2.3990e-04
Loss = 5.5735e-02, PNorm = 68.6648, GNorm = 0.3730, lr_0 = 2.3974e-04
Loss = 5.1491e-02, PNorm = 68.6713, GNorm = 0.5576, lr_0 = 2.3957e-04
Loss = 5.2708e-02, PNorm = 68.6754, GNorm = 0.5037, lr_0 = 2.3941e-04
Loss = 4.9997e-02, PNorm = 68.6778, GNorm = 0.9768, lr_0 = 2.3924e-04
Loss = 4.8708e-02, PNorm = 68.6830, GNorm = 0.7491, lr_0 = 2.3908e-04
Loss = 4.4520e-02, PNorm = 68.6875, GNorm = 0.6225, lr_0 = 2.3892e-04
Loss = 4.0680e-02, PNorm = 68.6904, GNorm = 0.7073, lr_0 = 2.3875e-04
Loss = 4.1765e-02, PNorm = 68.6930, GNorm = 0.8376, lr_0 = 2.3859e-04
Loss = 5.2615e-02, PNorm = 68.6972, GNorm = 0.6610, lr_0 = 2.3842e-04
Loss = 4.8252e-02, PNorm = 68.7013, GNorm = 0.8053, lr_0 = 2.3826e-04
Loss = 4.8050e-02, PNorm = 68.7037, GNorm = 0.5009, lr_0 = 2.3810e-04
Loss = 4.8783e-02, PNorm = 68.7064, GNorm = 0.4911, lr_0 = 2.3794e-04
Loss = 5.5973e-02, PNorm = 68.7104, GNorm = 0.9538, lr_0 = 2.3777e-04
Loss = 4.5618e-02, PNorm = 68.7149, GNorm = 0.6337, lr_0 = 2.3761e-04
Loss = 4.5669e-02, PNorm = 68.7189, GNorm = 0.3919, lr_0 = 2.3745e-04
Loss = 4.4923e-02, PNorm = 68.7224, GNorm = 0.3687, lr_0 = 2.3728e-04
Loss = 4.5606e-02, PNorm = 68.7251, GNorm = 0.4544, lr_0 = 2.3712e-04
Loss = 5.0207e-02, PNorm = 68.7283, GNorm = 0.4327, lr_0 = 2.3696e-04
Loss = 5.7336e-02, PNorm = 68.7321, GNorm = 0.5442, lr_0 = 2.3680e-04
Loss = 4.9643e-02, PNorm = 68.7372, GNorm = 0.4560, lr_0 = 2.3663e-04
Loss = 4.3084e-02, PNorm = 68.7429, GNorm = 0.5311, lr_0 = 2.3647e-04
Loss = 4.7909e-02, PNorm = 68.7483, GNorm = 0.5098, lr_0 = 2.3631e-04
Loss = 4.6513e-02, PNorm = 68.7504, GNorm = 0.4601, lr_0 = 2.3615e-04
Loss = 5.0962e-02, PNorm = 68.7534, GNorm = 0.5218, lr_0 = 2.3599e-04
Loss = 5.0706e-02, PNorm = 68.7573, GNorm = 0.4841, lr_0 = 2.3582e-04
Loss = 5.3523e-02, PNorm = 68.7588, GNorm = 0.6279, lr_0 = 2.3566e-04
Loss = 5.0934e-02, PNorm = 68.7620, GNorm = 0.4896, lr_0 = 2.3550e-04
Loss = 4.9415e-02, PNorm = 68.7685, GNorm = 0.5696, lr_0 = 2.3534e-04
Loss = 5.2680e-02, PNorm = 68.7738, GNorm = 0.5572, lr_0 = 2.3518e-04
Loss = 4.3444e-02, PNorm = 68.7777, GNorm = 0.4287, lr_0 = 2.3502e-04
Loss = 5.1464e-02, PNorm = 68.7798, GNorm = 0.6057, lr_0 = 2.3486e-04
Loss = 4.8415e-02, PNorm = 68.7834, GNorm = 0.4711, lr_0 = 2.3470e-04
Loss = 5.0630e-02, PNorm = 68.7879, GNorm = 0.5695, lr_0 = 2.3454e-04
Loss = 4.2786e-02, PNorm = 68.7921, GNorm = 0.4333, lr_0 = 2.3437e-04
Loss = 4.5004e-02, PNorm = 68.7954, GNorm = 0.5024, lr_0 = 2.3421e-04
Loss = 4.9125e-02, PNorm = 68.7989, GNorm = 0.5940, lr_0 = 2.3405e-04
Loss = 5.2167e-02, PNorm = 68.8024, GNorm = 0.7085, lr_0 = 2.3389e-04
Loss = 5.1414e-02, PNorm = 68.8042, GNorm = 0.3901, lr_0 = 2.3373e-04
Loss = 5.4212e-02, PNorm = 68.8088, GNorm = 0.4624, lr_0 = 2.3357e-04
Loss = 4.6606e-02, PNorm = 68.8144, GNorm = 0.4837, lr_0 = 2.3341e-04
Loss = 5.2750e-02, PNorm = 68.8175, GNorm = 0.6175, lr_0 = 2.3325e-04
Loss = 5.3723e-02, PNorm = 68.8208, GNorm = 0.6648, lr_0 = 2.3309e-04
Loss = 4.9205e-02, PNorm = 68.8265, GNorm = 0.4871, lr_0 = 2.3293e-04
Loss = 3.9137e-02, PNorm = 68.8311, GNorm = 0.6209, lr_0 = 2.3277e-04
Loss = 5.0152e-02, PNorm = 68.8355, GNorm = 0.5284, lr_0 = 2.3261e-04
Loss = 6.2815e-02, PNorm = 68.8387, GNorm = 0.4223, lr_0 = 2.3246e-04
Loss = 4.2088e-02, PNorm = 68.8415, GNorm = 0.4927, lr_0 = 2.3230e-04
Loss = 5.3464e-02, PNorm = 68.8433, GNorm = 0.6018, lr_0 = 2.3214e-04
Loss = 4.6272e-02, PNorm = 68.8465, GNorm = 0.5258, lr_0 = 2.3198e-04
Loss = 4.6115e-02, PNorm = 68.8505, GNorm = 0.6084, lr_0 = 2.3182e-04
Loss = 5.5250e-02, PNorm = 68.8535, GNorm = 0.6391, lr_0 = 2.3166e-04
Loss = 4.9812e-02, PNorm = 68.8576, GNorm = 0.7136, lr_0 = 2.3150e-04
Loss = 5.0683e-02, PNorm = 68.8618, GNorm = 0.4859, lr_0 = 2.3134e-04
Loss = 5.3341e-02, PNorm = 68.8643, GNorm = 1.0296, lr_0 = 2.3118e-04
Loss = 5.2908e-02, PNorm = 68.8676, GNorm = 0.4437, lr_0 = 2.3103e-04
Loss = 5.6295e-02, PNorm = 68.8710, GNorm = 0.6623, lr_0 = 2.3087e-04
Loss = 4.6659e-02, PNorm = 68.8738, GNorm = 0.4401, lr_0 = 2.3071e-04
Loss = 5.2970e-02, PNorm = 68.8784, GNorm = 0.8085, lr_0 = 2.3055e-04
Loss = 5.5012e-02, PNorm = 68.8839, GNorm = 0.5250, lr_0 = 2.3039e-04
Loss = 5.2728e-02, PNorm = 68.8889, GNorm = 0.5741, lr_0 = 2.3024e-04
Loss = 4.7742e-02, PNorm = 68.8945, GNorm = 0.5878, lr_0 = 2.3008e-04
Loss = 5.3229e-02, PNorm = 68.8989, GNorm = 0.5849, lr_0 = 2.2992e-04
Loss = 4.5653e-02, PNorm = 68.9008, GNorm = 0.4993, lr_0 = 2.2976e-04
Loss = 4.8193e-02, PNorm = 68.9015, GNorm = 0.4558, lr_0 = 2.2961e-04
Loss = 5.1230e-02, PNorm = 68.9054, GNorm = 0.6038, lr_0 = 2.2945e-04
Loss = 5.7869e-02, PNorm = 68.9083, GNorm = 0.5527, lr_0 = 2.2929e-04
Loss = 5.7717e-02, PNorm = 68.9129, GNorm = 0.4158, lr_0 = 2.2913e-04
Loss = 5.4625e-02, PNorm = 68.9157, GNorm = 0.5293, lr_0 = 2.2898e-04
Loss = 5.1783e-02, PNorm = 68.9193, GNorm = 0.5846, lr_0 = 2.2882e-04
Loss = 5.1235e-02, PNorm = 68.9240, GNorm = 0.5594, lr_0 = 2.2866e-04
Loss = 4.7894e-02, PNorm = 68.9267, GNorm = 0.5373, lr_0 = 2.2851e-04
Loss = 5.5399e-02, PNorm = 68.9286, GNorm = 0.4639, lr_0 = 2.2835e-04
Loss = 6.0949e-02, PNorm = 68.9334, GNorm = 0.5544, lr_0 = 2.2819e-04
Loss = 5.6682e-02, PNorm = 68.9384, GNorm = 0.5373, lr_0 = 2.2804e-04
Loss = 4.7336e-02, PNorm = 68.9427, GNorm = 0.6696, lr_0 = 2.2788e-04
Loss = 4.6478e-02, PNorm = 68.9466, GNorm = 0.6089, lr_0 = 2.2773e-04
Loss = 5.1174e-02, PNorm = 68.9512, GNorm = 0.4256, lr_0 = 2.2757e-04
Validation mae = 0.386410
Epoch 20
Loss = 4.6396e-02, PNorm = 68.9551, GNorm = 0.7578, lr_0 = 2.2741e-04
Loss = 4.9784e-02, PNorm = 68.9580, GNorm = 0.5950, lr_0 = 2.2726e-04
Loss = 4.6562e-02, PNorm = 68.9611, GNorm = 0.8784, lr_0 = 2.2710e-04
Loss = 4.4460e-02, PNorm = 68.9656, GNorm = 0.6178, lr_0 = 2.2695e-04
Loss = 4.5351e-02, PNorm = 68.9687, GNorm = 0.3047, lr_0 = 2.2679e-04
Loss = 4.8644e-02, PNorm = 68.9724, GNorm = 0.6812, lr_0 = 2.2664e-04
Loss = 4.2903e-02, PNorm = 68.9761, GNorm = 0.5474, lr_0 = 2.2648e-04
Loss = 5.2006e-02, PNorm = 68.9830, GNorm = 0.6751, lr_0 = 2.2632e-04
Loss = 3.8794e-02, PNorm = 68.9888, GNorm = 0.4362, lr_0 = 2.2617e-04
Loss = 4.2017e-02, PNorm = 68.9922, GNorm = 0.7386, lr_0 = 2.2601e-04
Loss = 4.0871e-02, PNorm = 68.9949, GNorm = 0.6488, lr_0 = 2.2586e-04
Loss = 4.2881e-02, PNorm = 68.9967, GNorm = 0.5514, lr_0 = 2.2571e-04
Loss = 4.1897e-02, PNorm = 68.9998, GNorm = 0.6050, lr_0 = 2.2555e-04
Loss = 3.8834e-02, PNorm = 69.0049, GNorm = 0.5857, lr_0 = 2.2540e-04
Loss = 4.4236e-02, PNorm = 69.0104, GNorm = 0.6496, lr_0 = 2.2524e-04
Loss = 4.0173e-02, PNorm = 69.0135, GNorm = 0.4022, lr_0 = 2.2509e-04
Loss = 4.7749e-02, PNorm = 69.0180, GNorm = 0.7958, lr_0 = 2.2493e-04
Loss = 4.0572e-02, PNorm = 69.0203, GNorm = 0.5509, lr_0 = 2.2478e-04
Loss = 4.7859e-02, PNorm = 69.0228, GNorm = 0.4448, lr_0 = 2.2463e-04
Loss = 4.0221e-02, PNorm = 69.0272, GNorm = 0.3887, lr_0 = 2.2447e-04
Loss = 4.0342e-02, PNorm = 69.0314, GNorm = 0.5084, lr_0 = 2.2432e-04
Loss = 5.0998e-02, PNorm = 69.0354, GNorm = 0.5202, lr_0 = 2.2416e-04
Loss = 4.6049e-02, PNorm = 69.0396, GNorm = 0.5409, lr_0 = 2.2401e-04
Loss = 4.6110e-02, PNorm = 69.0443, GNorm = 0.6041, lr_0 = 2.2386e-04
Loss = 4.0437e-02, PNorm = 69.0464, GNorm = 0.6187, lr_0 = 2.2370e-04
Loss = 4.9318e-02, PNorm = 69.0501, GNorm = 0.5316, lr_0 = 2.2355e-04
Loss = 4.9119e-02, PNorm = 69.0551, GNorm = 0.4921, lr_0 = 2.2340e-04
Loss = 4.8094e-02, PNorm = 69.0619, GNorm = 0.4413, lr_0 = 2.2324e-04
Loss = 4.7449e-02, PNorm = 69.0671, GNorm = 0.4434, lr_0 = 2.2309e-04
Loss = 4.4707e-02, PNorm = 69.0708, GNorm = 0.4303, lr_0 = 2.2294e-04
Loss = 4.8760e-02, PNorm = 69.0743, GNorm = 0.5072, lr_0 = 2.2279e-04
Loss = 4.2380e-02, PNorm = 69.0772, GNorm = 0.5285, lr_0 = 2.2263e-04
Loss = 4.6690e-02, PNorm = 69.0817, GNorm = 0.6357, lr_0 = 2.2248e-04
Loss = 4.8828e-02, PNorm = 69.0849, GNorm = 0.8376, lr_0 = 2.2233e-04
Loss = 4.3941e-02, PNorm = 69.0875, GNorm = 0.5607, lr_0 = 2.2218e-04
Loss = 3.7304e-02, PNorm = 69.0901, GNorm = 0.4831, lr_0 = 2.2202e-04
Loss = 5.2171e-02, PNorm = 69.0923, GNorm = 0.6517, lr_0 = 2.2187e-04
Loss = 4.3823e-02, PNorm = 69.0968, GNorm = 0.4238, lr_0 = 2.2172e-04
Loss = 4.5855e-02, PNorm = 69.1015, GNorm = 0.5957, lr_0 = 2.2157e-04
Loss = 4.7522e-02, PNorm = 69.1051, GNorm = 0.4945, lr_0 = 2.2142e-04
Loss = 3.6742e-02, PNorm = 69.1079, GNorm = 0.4554, lr_0 = 2.2126e-04
Loss = 4.9297e-02, PNorm = 69.1110, GNorm = 0.5329, lr_0 = 2.2111e-04
Loss = 4.1118e-02, PNorm = 69.1159, GNorm = 0.5764, lr_0 = 2.2096e-04
Loss = 4.2492e-02, PNorm = 69.1216, GNorm = 0.4706, lr_0 = 2.2081e-04
Loss = 3.8402e-02, PNorm = 69.1252, GNorm = 0.3596, lr_0 = 2.2066e-04
Loss = 4.2973e-02, PNorm = 69.1287, GNorm = 0.4146, lr_0 = 2.2051e-04
Loss = 4.6809e-02, PNorm = 69.1319, GNorm = 0.5283, lr_0 = 2.2036e-04
Loss = 5.2140e-02, PNorm = 69.1350, GNorm = 0.6445, lr_0 = 2.2021e-04
Loss = 5.1654e-02, PNorm = 69.1394, GNorm = 0.5914, lr_0 = 2.2005e-04
Loss = 4.8083e-02, PNorm = 69.1438, GNorm = 0.4501, lr_0 = 2.1990e-04
Loss = 5.0731e-02, PNorm = 69.1493, GNorm = 0.5362, lr_0 = 2.1975e-04
Loss = 5.6872e-02, PNorm = 69.1533, GNorm = 0.5081, lr_0 = 2.1960e-04
Loss = 4.3485e-02, PNorm = 69.1557, GNorm = 0.5145, lr_0 = 2.1945e-04
Loss = 3.8624e-02, PNorm = 69.1588, GNorm = 0.4719, lr_0 = 2.1930e-04
Loss = 4.7773e-02, PNorm = 69.1602, GNorm = 0.5384, lr_0 = 2.1915e-04
Loss = 4.6155e-02, PNorm = 69.1631, GNorm = 0.4660, lr_0 = 2.1900e-04
Loss = 4.1640e-02, PNorm = 69.1678, GNorm = 0.6130, lr_0 = 2.1885e-04
Loss = 4.4602e-02, PNorm = 69.1732, GNorm = 0.8488, lr_0 = 2.1870e-04
Loss = 4.1604e-02, PNorm = 69.1778, GNorm = 0.4615, lr_0 = 2.1855e-04
Loss = 5.2998e-02, PNorm = 69.1812, GNorm = 0.7043, lr_0 = 2.1840e-04
Loss = 4.5680e-02, PNorm = 69.1873, GNorm = 0.3687, lr_0 = 2.1825e-04
Loss = 4.4634e-02, PNorm = 69.1909, GNorm = 0.5117, lr_0 = 2.1810e-04
Loss = 5.3403e-02, PNorm = 69.1942, GNorm = 0.3989, lr_0 = 2.1795e-04
Loss = 5.9894e-02, PNorm = 69.1985, GNorm = 0.8311, lr_0 = 2.1780e-04
Loss = 4.6296e-02, PNorm = 69.2017, GNorm = 0.5699, lr_0 = 2.1765e-04
Loss = 4.9383e-02, PNorm = 69.2041, GNorm = 0.4761, lr_0 = 2.1751e-04
Loss = 4.8172e-02, PNorm = 69.2077, GNorm = 0.6132, lr_0 = 2.1736e-04
Loss = 5.0853e-02, PNorm = 69.2121, GNorm = 0.6468, lr_0 = 2.1721e-04
Loss = 4.0418e-02, PNorm = 69.2163, GNorm = 0.4012, lr_0 = 2.1706e-04
Loss = 4.9036e-02, PNorm = 69.2200, GNorm = 0.4975, lr_0 = 2.1691e-04
Loss = 5.4573e-02, PNorm = 69.2247, GNorm = 0.9786, lr_0 = 2.1676e-04
Loss = 5.2882e-02, PNorm = 69.2280, GNorm = 0.9188, lr_0 = 2.1661e-04
Loss = 5.4264e-02, PNorm = 69.2305, GNorm = 0.6027, lr_0 = 2.1646e-04
Loss = 4.8767e-02, PNorm = 69.2338, GNorm = 0.7046, lr_0 = 2.1632e-04
Loss = 4.8024e-02, PNorm = 69.2366, GNorm = 0.4281, lr_0 = 2.1617e-04
Loss = 5.0227e-02, PNorm = 69.2408, GNorm = 0.5772, lr_0 = 2.1602e-04
Loss = 4.8597e-02, PNorm = 69.2467, GNorm = 0.4251, lr_0 = 2.1587e-04
Loss = 5.4442e-02, PNorm = 69.2518, GNorm = 0.6587, lr_0 = 2.1572e-04
Loss = 6.4033e-02, PNorm = 69.2560, GNorm = 0.9031, lr_0 = 2.1558e-04
Loss = 5.5280e-02, PNorm = 69.2617, GNorm = 0.9148, lr_0 = 2.1543e-04
Loss = 5.3753e-02, PNorm = 69.2663, GNorm = 0.6104, lr_0 = 2.1528e-04
Loss = 5.0212e-02, PNorm = 69.2697, GNorm = 0.4605, lr_0 = 2.1513e-04
Loss = 5.1092e-02, PNorm = 69.2724, GNorm = 0.7843, lr_0 = 2.1499e-04
Loss = 5.5279e-02, PNorm = 69.2729, GNorm = 0.4663, lr_0 = 2.1484e-04
Loss = 4.3198e-02, PNorm = 69.2731, GNorm = 0.5765, lr_0 = 2.1469e-04
Loss = 4.6145e-02, PNorm = 69.2753, GNorm = 0.4798, lr_0 = 2.1454e-04
Loss = 4.6543e-02, PNorm = 69.2792, GNorm = 0.4878, lr_0 = 2.1440e-04
Loss = 4.8553e-02, PNorm = 69.2832, GNorm = 0.4498, lr_0 = 2.1425e-04
Loss = 5.4773e-02, PNorm = 69.2875, GNorm = 0.8230, lr_0 = 2.1410e-04
Loss = 4.1963e-02, PNorm = 69.2913, GNorm = 0.4310, lr_0 = 2.1396e-04
Loss = 4.5267e-02, PNorm = 69.2950, GNorm = 0.5001, lr_0 = 2.1381e-04
Loss = 4.9018e-02, PNorm = 69.2994, GNorm = 0.3975, lr_0 = 2.1366e-04
Loss = 4.0396e-02, PNorm = 69.3032, GNorm = 0.6085, lr_0 = 2.1352e-04
Loss = 5.0084e-02, PNorm = 69.3064, GNorm = 0.5364, lr_0 = 2.1337e-04
Loss = 4.1004e-02, PNorm = 69.3112, GNorm = 0.4493, lr_0 = 2.1323e-04
Loss = 4.9185e-02, PNorm = 69.3152, GNorm = 0.4481, lr_0 = 2.1308e-04
Loss = 4.4893e-02, PNorm = 69.3180, GNorm = 0.3332, lr_0 = 2.1293e-04
Loss = 4.8917e-02, PNorm = 69.3205, GNorm = 0.4900, lr_0 = 2.1279e-04
Loss = 5.0216e-02, PNorm = 69.3223, GNorm = 0.5682, lr_0 = 2.1264e-04
Loss = 4.7765e-02, PNorm = 69.3256, GNorm = 0.5508, lr_0 = 2.1250e-04
Loss = 4.8752e-02, PNorm = 69.3286, GNorm = 0.5387, lr_0 = 2.1235e-04
Loss = 4.5875e-02, PNorm = 69.3304, GNorm = 0.5464, lr_0 = 2.1221e-04
Loss = 5.4859e-02, PNorm = 69.3333, GNorm = 0.5068, lr_0 = 2.1206e-04
Loss = 4.8891e-02, PNorm = 69.3375, GNorm = 0.7014, lr_0 = 2.1191e-04
Loss = 4.4140e-02, PNorm = 69.3411, GNorm = 0.5573, lr_0 = 2.1177e-04
Loss = 4.9130e-02, PNorm = 69.3442, GNorm = 0.5320, lr_0 = 2.1162e-04
Loss = 5.3623e-02, PNorm = 69.3490, GNorm = 0.7870, lr_0 = 2.1148e-04
Loss = 3.8952e-02, PNorm = 69.3525, GNorm = 0.4151, lr_0 = 2.1133e-04
Loss = 5.3427e-02, PNorm = 69.3554, GNorm = 0.8324, lr_0 = 2.1119e-04
Loss = 4.7748e-02, PNorm = 69.3597, GNorm = 0.3922, lr_0 = 2.1104e-04
Loss = 4.6855e-02, PNorm = 69.3633, GNorm = 0.8068, lr_0 = 2.1090e-04
Loss = 4.7558e-02, PNorm = 69.3654, GNorm = 0.4893, lr_0 = 2.1076e-04
Loss = 4.3868e-02, PNorm = 69.3687, GNorm = 0.4782, lr_0 = 2.1061e-04
Loss = 4.5773e-02, PNorm = 69.3720, GNorm = 0.5082, lr_0 = 2.1047e-04
Loss = 4.9892e-02, PNorm = 69.3755, GNorm = 0.6799, lr_0 = 2.1032e-04
Loss = 4.4511e-02, PNorm = 69.3799, GNorm = 0.4864, lr_0 = 2.1018e-04
Loss = 4.2498e-02, PNorm = 69.3823, GNorm = 0.4541, lr_0 = 2.1003e-04
Loss = 5.5126e-02, PNorm = 69.3829, GNorm = 0.6247, lr_0 = 2.0989e-04
Loss = 5.0036e-02, PNorm = 69.3852, GNorm = 0.4870, lr_0 = 2.0975e-04
Loss = 5.8743e-02, PNorm = 69.3880, GNorm = 0.8144, lr_0 = 2.0960e-04
Validation mae = 0.382695
Epoch 21
Loss = 4.0799e-02, PNorm = 69.3917, GNorm = 0.5991, lr_0 = 2.0946e-04
Loss = 3.9495e-02, PNorm = 69.3949, GNorm = 0.4827, lr_0 = 2.0932e-04
Loss = 4.5680e-02, PNorm = 69.3994, GNorm = 0.5631, lr_0 = 2.0917e-04
Loss = 3.9137e-02, PNorm = 69.4037, GNorm = 0.4176, lr_0 = 2.0903e-04
Loss = 3.8985e-02, PNorm = 69.4078, GNorm = 0.4533, lr_0 = 2.0889e-04
Loss = 4.7934e-02, PNorm = 69.4119, GNorm = 0.3797, lr_0 = 2.0874e-04
Loss = 4.2602e-02, PNorm = 69.4159, GNorm = 0.4254, lr_0 = 2.0860e-04
Loss = 4.1774e-02, PNorm = 69.4191, GNorm = 0.5825, lr_0 = 2.0846e-04
Loss = 4.2871e-02, PNorm = 69.4225, GNorm = 0.4114, lr_0 = 2.0831e-04
Loss = 4.2169e-02, PNorm = 69.4257, GNorm = 0.7292, lr_0 = 2.0817e-04
Loss = 4.2770e-02, PNorm = 69.4291, GNorm = 0.6660, lr_0 = 2.0803e-04
Loss = 4.0842e-02, PNorm = 69.4331, GNorm = 0.5495, lr_0 = 2.0789e-04
Loss = 4.2612e-02, PNorm = 69.4379, GNorm = 0.6258, lr_0 = 2.0774e-04
Loss = 3.7290e-02, PNorm = 69.4425, GNorm = 0.4644, lr_0 = 2.0760e-04
Loss = 4.4490e-02, PNorm = 69.4460, GNorm = 0.4573, lr_0 = 2.0746e-04
Loss = 4.6531e-02, PNorm = 69.4508, GNorm = 0.4515, lr_0 = 2.0732e-04
Loss = 4.0820e-02, PNorm = 69.4538, GNorm = 0.4989, lr_0 = 2.0718e-04
Loss = 3.8184e-02, PNorm = 69.4568, GNorm = 0.5473, lr_0 = 2.0703e-04
Loss = 4.7486e-02, PNorm = 69.4599, GNorm = 0.5271, lr_0 = 2.0689e-04
Loss = 4.1582e-02, PNorm = 69.4626, GNorm = 0.6659, lr_0 = 2.0675e-04
Loss = 3.7301e-02, PNorm = 69.4649, GNorm = 0.4505, lr_0 = 2.0661e-04
Loss = 4.7908e-02, PNorm = 69.4666, GNorm = 0.6032, lr_0 = 2.0647e-04
Loss = 4.0229e-02, PNorm = 69.4695, GNorm = 0.4560, lr_0 = 2.0633e-04
Loss = 4.4788e-02, PNorm = 69.4729, GNorm = 0.8152, lr_0 = 2.0618e-04
Loss = 3.5665e-02, PNorm = 69.4774, GNorm = 0.5488, lr_0 = 2.0604e-04
Loss = 4.1631e-02, PNorm = 69.4817, GNorm = 0.6127, lr_0 = 2.0590e-04
Loss = 4.4916e-02, PNorm = 69.4870, GNorm = 0.4254, lr_0 = 2.0576e-04
Loss = 4.0477e-02, PNorm = 69.4927, GNorm = 0.3835, lr_0 = 2.0562e-04
Loss = 4.6095e-02, PNorm = 69.4965, GNorm = 0.6574, lr_0 = 2.0548e-04
Loss = 4.1274e-02, PNorm = 69.4983, GNorm = 0.3555, lr_0 = 2.0534e-04
Loss = 4.5496e-02, PNorm = 69.5006, GNorm = 0.4378, lr_0 = 2.0520e-04
Loss = 4.8194e-02, PNorm = 69.5032, GNorm = 0.5103, lr_0 = 2.0506e-04
Loss = 4.6547e-02, PNorm = 69.5076, GNorm = 0.5411, lr_0 = 2.0492e-04
Loss = 4.8999e-02, PNorm = 69.5128, GNorm = 0.7043, lr_0 = 2.0478e-04
Loss = 4.5121e-02, PNorm = 69.5169, GNorm = 0.6607, lr_0 = 2.0464e-04
Loss = 4.3394e-02, PNorm = 69.5212, GNorm = 0.7746, lr_0 = 2.0450e-04
Loss = 4.4140e-02, PNorm = 69.5257, GNorm = 0.5425, lr_0 = 2.0436e-04
Loss = 3.9741e-02, PNorm = 69.5313, GNorm = 0.5671, lr_0 = 2.0422e-04
Loss = 4.4588e-02, PNorm = 69.5346, GNorm = 0.4151, lr_0 = 2.0408e-04
Loss = 4.2193e-02, PNorm = 69.5379, GNorm = 0.3483, lr_0 = 2.0394e-04
Loss = 4.5452e-02, PNorm = 69.5413, GNorm = 0.5717, lr_0 = 2.0380e-04
Loss = 4.2206e-02, PNorm = 69.5432, GNorm = 0.8569, lr_0 = 2.0366e-04
Loss = 3.9293e-02, PNorm = 69.5451, GNorm = 0.4296, lr_0 = 2.0352e-04
Loss = 4.6162e-02, PNorm = 69.5478, GNorm = 0.4384, lr_0 = 2.0338e-04
Loss = 4.1026e-02, PNorm = 69.5499, GNorm = 0.4746, lr_0 = 2.0324e-04
Loss = 4.0242e-02, PNorm = 69.5528, GNorm = 0.4433, lr_0 = 2.0310e-04
Loss = 4.6300e-02, PNorm = 69.5547, GNorm = 0.4396, lr_0 = 2.0296e-04
Loss = 3.5298e-02, PNorm = 69.5589, GNorm = 0.4849, lr_0 = 2.0282e-04
Loss = 4.7596e-02, PNorm = 69.5633, GNorm = 0.8281, lr_0 = 2.0268e-04
Loss = 5.2551e-02, PNorm = 69.5677, GNorm = 0.6130, lr_0 = 2.0254e-04
Loss = 4.3936e-02, PNorm = 69.5701, GNorm = 0.4692, lr_0 = 2.0240e-04
Loss = 4.3709e-02, PNorm = 69.5744, GNorm = 0.4478, lr_0 = 2.0227e-04
Loss = 4.2983e-02, PNorm = 69.5778, GNorm = 0.5819, lr_0 = 2.0213e-04
Loss = 4.1110e-02, PNorm = 69.5799, GNorm = 0.4682, lr_0 = 2.0199e-04
Loss = 4.2744e-02, PNorm = 69.5816, GNorm = 0.4703, lr_0 = 2.0185e-04
Loss = 4.5450e-02, PNorm = 69.5844, GNorm = 0.5136, lr_0 = 2.0171e-04
Loss = 4.7174e-02, PNorm = 69.5877, GNorm = 0.3957, lr_0 = 2.0157e-04
Loss = 5.2090e-02, PNorm = 69.5926, GNorm = 0.5409, lr_0 = 2.0144e-04
Loss = 4.9539e-02, PNorm = 69.5964, GNorm = 0.3390, lr_0 = 2.0130e-04
Loss = 4.5071e-02, PNorm = 69.5986, GNorm = 0.6102, lr_0 = 2.0116e-04
Loss = 3.6504e-02, PNorm = 69.6001, GNorm = 0.6364, lr_0 = 2.0102e-04
Loss = 4.3394e-02, PNorm = 69.6028, GNorm = 0.3862, lr_0 = 2.0088e-04
Loss = 4.0067e-02, PNorm = 69.6077, GNorm = 0.3971, lr_0 = 2.0075e-04
Loss = 3.9487e-02, PNorm = 69.6119, GNorm = 0.9190, lr_0 = 2.0061e-04
Loss = 4.8716e-02, PNorm = 69.6148, GNorm = 0.7889, lr_0 = 2.0047e-04
Loss = 3.8373e-02, PNorm = 69.6165, GNorm = 0.5590, lr_0 = 2.0033e-04
Loss = 4.3782e-02, PNorm = 69.6176, GNorm = 0.4482, lr_0 = 2.0020e-04
Loss = 4.2135e-02, PNorm = 69.6205, GNorm = 0.8798, lr_0 = 2.0006e-04
Loss = 4.1044e-02, PNorm = 69.6231, GNorm = 0.5652, lr_0 = 1.9992e-04
Loss = 4.7189e-02, PNorm = 69.6270, GNorm = 0.4339, lr_0 = 1.9979e-04
Loss = 5.0299e-02, PNorm = 69.6323, GNorm = 0.7138, lr_0 = 1.9965e-04
Loss = 4.4961e-02, PNorm = 69.6372, GNorm = 0.7733, lr_0 = 1.9951e-04
Loss = 4.4791e-02, PNorm = 69.6423, GNorm = 0.5091, lr_0 = 1.9938e-04
Loss = 4.3704e-02, PNorm = 69.6468, GNorm = 0.4690, lr_0 = 1.9924e-04
Loss = 4.3122e-02, PNorm = 69.6501, GNorm = 0.5504, lr_0 = 1.9910e-04
Loss = 4.6531e-02, PNorm = 69.6519, GNorm = 0.7298, lr_0 = 1.9897e-04
Loss = 4.5143e-02, PNorm = 69.6532, GNorm = 0.5569, lr_0 = 1.9883e-04
Loss = 4.8377e-02, PNorm = 69.6539, GNorm = 0.4054, lr_0 = 1.9869e-04
Loss = 4.4345e-02, PNorm = 69.6586, GNorm = 0.4805, lr_0 = 1.9856e-04
Loss = 4.3059e-02, PNorm = 69.6615, GNorm = 0.5235, lr_0 = 1.9842e-04
Loss = 4.6445e-02, PNorm = 69.6640, GNorm = 0.5975, lr_0 = 1.9829e-04
Loss = 3.9579e-02, PNorm = 69.6657, GNorm = 0.3513, lr_0 = 1.9815e-04
Loss = 4.9829e-02, PNorm = 69.6680, GNorm = 0.4778, lr_0 = 1.9801e-04
Loss = 4.8091e-02, PNorm = 69.6705, GNorm = 0.6203, lr_0 = 1.9788e-04
Loss = 4.5222e-02, PNorm = 69.6744, GNorm = 0.5249, lr_0 = 1.9774e-04
Loss = 4.3910e-02, PNorm = 69.6789, GNorm = 0.5887, lr_0 = 1.9761e-04
Loss = 4.0655e-02, PNorm = 69.6835, GNorm = 0.4689, lr_0 = 1.9747e-04
Loss = 4.3487e-02, PNorm = 69.6872, GNorm = 0.3584, lr_0 = 1.9734e-04
Loss = 4.2416e-02, PNorm = 69.6886, GNorm = 0.3617, lr_0 = 1.9720e-04
Loss = 4.7609e-02, PNorm = 69.6906, GNorm = 0.5315, lr_0 = 1.9707e-04
Loss = 4.7685e-02, PNorm = 69.6924, GNorm = 0.6255, lr_0 = 1.9693e-04
Loss = 4.5283e-02, PNorm = 69.6942, GNorm = 0.5774, lr_0 = 1.9680e-04
Loss = 4.1451e-02, PNorm = 69.6953, GNorm = 0.5892, lr_0 = 1.9666e-04
Loss = 4.8689e-02, PNorm = 69.7000, GNorm = 0.4467, lr_0 = 1.9653e-04
Loss = 4.5955e-02, PNorm = 69.7048, GNorm = 0.5096, lr_0 = 1.9639e-04
Loss = 4.6089e-02, PNorm = 69.7092, GNorm = 0.4750, lr_0 = 1.9626e-04
Loss = 4.2984e-02, PNorm = 69.7121, GNorm = 0.4818, lr_0 = 1.9612e-04
Loss = 3.7535e-02, PNorm = 69.7155, GNorm = 0.5429, lr_0 = 1.9599e-04
Loss = 4.1037e-02, PNorm = 69.7178, GNorm = 0.6852, lr_0 = 1.9585e-04
Loss = 3.9478e-02, PNorm = 69.7210, GNorm = 0.4509, lr_0 = 1.9572e-04
Loss = 4.4645e-02, PNorm = 69.7246, GNorm = 0.3847, lr_0 = 1.9559e-04
Loss = 4.2576e-02, PNorm = 69.7284, GNorm = 0.4673, lr_0 = 1.9545e-04
Loss = 4.7366e-02, PNorm = 69.7296, GNorm = 0.4615, lr_0 = 1.9532e-04
Loss = 4.2555e-02, PNorm = 69.7314, GNorm = 0.6859, lr_0 = 1.9518e-04
Loss = 4.5721e-02, PNorm = 69.7328, GNorm = 0.5500, lr_0 = 1.9505e-04
Loss = 4.7558e-02, PNorm = 69.7358, GNorm = 0.5527, lr_0 = 1.9492e-04
Loss = 5.1558e-02, PNorm = 69.7399, GNorm = 0.3399, lr_0 = 1.9478e-04
Loss = 4.5873e-02, PNorm = 69.7431, GNorm = 0.4261, lr_0 = 1.9465e-04
Loss = 4.0270e-02, PNorm = 69.7487, GNorm = 0.4569, lr_0 = 1.9452e-04
Loss = 4.6939e-02, PNorm = 69.7541, GNorm = 0.5416, lr_0 = 1.9438e-04
Loss = 5.0408e-02, PNorm = 69.7574, GNorm = 0.4973, lr_0 = 1.9425e-04
Loss = 4.5109e-02, PNorm = 69.7590, GNorm = 0.3945, lr_0 = 1.9412e-04
Loss = 4.5988e-02, PNorm = 69.7615, GNorm = 0.9490, lr_0 = 1.9398e-04
Loss = 4.3179e-02, PNorm = 69.7649, GNorm = 0.4394, lr_0 = 1.9385e-04
Loss = 5.4508e-02, PNorm = 69.7666, GNorm = 1.1104, lr_0 = 1.9372e-04
Loss = 4.4498e-02, PNorm = 69.7689, GNorm = 0.7041, lr_0 = 1.9359e-04
Loss = 4.3215e-02, PNorm = 69.7717, GNorm = 0.7884, lr_0 = 1.9345e-04
Loss = 4.6270e-02, PNorm = 69.7748, GNorm = 0.3868, lr_0 = 1.9332e-04
Loss = 4.4786e-02, PNorm = 69.7783, GNorm = 0.3852, lr_0 = 1.9319e-04
Loss = 4.1154e-02, PNorm = 69.7817, GNorm = 0.4461, lr_0 = 1.9306e-04
Validation mae = 0.384312
Epoch 22
Loss = 3.4383e-02, PNorm = 69.7837, GNorm = 0.6255, lr_0 = 1.9292e-04
Loss = 3.0582e-02, PNorm = 69.7853, GNorm = 0.3327, lr_0 = 1.9279e-04
Loss = 3.9709e-02, PNorm = 69.7878, GNorm = 0.4760, lr_0 = 1.9266e-04
Loss = 4.4131e-02, PNorm = 69.7928, GNorm = 0.3886, lr_0 = 1.9253e-04
Loss = 4.2004e-02, PNorm = 69.7995, GNorm = 0.3615, lr_0 = 1.9240e-04
Loss = 3.7231e-02, PNorm = 69.8040, GNorm = 0.3913, lr_0 = 1.9226e-04
Loss = 3.6739e-02, PNorm = 69.8082, GNorm = 0.4002, lr_0 = 1.9213e-04
Loss = 3.6580e-02, PNorm = 69.8114, GNorm = 0.5591, lr_0 = 1.9200e-04
Loss = 3.9175e-02, PNorm = 69.8134, GNorm = 0.5544, lr_0 = 1.9187e-04
Loss = 3.6369e-02, PNorm = 69.8153, GNorm = 0.5087, lr_0 = 1.9174e-04
Loss = 3.5634e-02, PNorm = 69.8180, GNorm = 0.5865, lr_0 = 1.9161e-04
Loss = 4.6781e-02, PNorm = 69.8212, GNorm = 0.7346, lr_0 = 1.9148e-04
Loss = 3.9692e-02, PNorm = 69.8251, GNorm = 0.4717, lr_0 = 1.9134e-04
Loss = 3.8381e-02, PNorm = 69.8282, GNorm = 0.4521, lr_0 = 1.9121e-04
Loss = 3.1706e-02, PNorm = 69.8304, GNorm = 0.3717, lr_0 = 1.9108e-04
Loss = 4.4675e-02, PNorm = 69.8326, GNorm = 0.5656, lr_0 = 1.9095e-04
Loss = 4.3773e-02, PNorm = 69.8354, GNorm = 0.5791, lr_0 = 1.9082e-04
Loss = 3.9240e-02, PNorm = 69.8386, GNorm = 0.4919, lr_0 = 1.9069e-04
Loss = 4.4780e-02, PNorm = 69.8435, GNorm = 0.5651, lr_0 = 1.9056e-04
Loss = 4.0580e-02, PNorm = 69.8481, GNorm = 0.4648, lr_0 = 1.9043e-04
Loss = 3.6457e-02, PNorm = 69.8510, GNorm = 0.3427, lr_0 = 1.9030e-04
Loss = 3.4211e-02, PNorm = 69.8533, GNorm = 0.4043, lr_0 = 1.9017e-04
Loss = 3.7772e-02, PNorm = 69.8559, GNorm = 0.5275, lr_0 = 1.9004e-04
Loss = 4.0498e-02, PNorm = 69.8582, GNorm = 0.5980, lr_0 = 1.8991e-04
Loss = 3.8645e-02, PNorm = 69.8602, GNorm = 0.4436, lr_0 = 1.8978e-04
Loss = 3.7626e-02, PNorm = 69.8621, GNorm = 0.3858, lr_0 = 1.8965e-04
Loss = 4.2148e-02, PNorm = 69.8640, GNorm = 0.3919, lr_0 = 1.8952e-04
Loss = 4.2722e-02, PNorm = 69.8686, GNorm = 0.4291, lr_0 = 1.8939e-04
Loss = 3.4953e-02, PNorm = 69.8719, GNorm = 0.5320, lr_0 = 1.8926e-04
Loss = 4.3064e-02, PNorm = 69.8737, GNorm = 0.5365, lr_0 = 1.8913e-04
Loss = 3.5261e-02, PNorm = 69.8760, GNorm = 0.5053, lr_0 = 1.8900e-04
Loss = 3.5470e-02, PNorm = 69.8789, GNorm = 0.3677, lr_0 = 1.8887e-04
Loss = 4.0133e-02, PNorm = 69.8802, GNorm = 0.5296, lr_0 = 1.8874e-04
Loss = 4.4466e-02, PNorm = 69.8813, GNorm = 0.5742, lr_0 = 1.8861e-04
Loss = 3.9747e-02, PNorm = 69.8861, GNorm = 0.4380, lr_0 = 1.8848e-04
Loss = 3.7624e-02, PNorm = 69.8902, GNorm = 0.5168, lr_0 = 1.8835e-04
Loss = 4.1254e-02, PNorm = 69.8926, GNorm = 0.3858, lr_0 = 1.8822e-04
Loss = 4.2603e-02, PNorm = 69.8979, GNorm = 0.6423, lr_0 = 1.8809e-04
Loss = 4.7807e-02, PNorm = 69.9007, GNorm = 0.9538, lr_0 = 1.8797e-04
Loss = 4.1080e-02, PNorm = 69.9023, GNorm = 0.5489, lr_0 = 1.8784e-04
Loss = 3.5413e-02, PNorm = 69.9067, GNorm = 0.4616, lr_0 = 1.8771e-04
Loss = 4.4694e-02, PNorm = 69.9106, GNorm = 0.6359, lr_0 = 1.8758e-04
Loss = 3.9142e-02, PNorm = 69.9139, GNorm = 0.4256, lr_0 = 1.8745e-04
Loss = 3.4177e-02, PNorm = 69.9169, GNorm = 0.3873, lr_0 = 1.8732e-04
Loss = 3.9091e-02, PNorm = 69.9204, GNorm = 0.4523, lr_0 = 1.8719e-04
Loss = 4.4404e-02, PNorm = 69.9226, GNorm = 1.0774, lr_0 = 1.8707e-04
Loss = 4.2597e-02, PNorm = 69.9248, GNorm = 0.5807, lr_0 = 1.8694e-04
Loss = 3.7305e-02, PNorm = 69.9282, GNorm = 0.5655, lr_0 = 1.8681e-04
Loss = 4.5067e-02, PNorm = 69.9314, GNorm = 0.4635, lr_0 = 1.8668e-04
Loss = 4.4871e-02, PNorm = 69.9346, GNorm = 0.4927, lr_0 = 1.8655e-04
Loss = 4.1545e-02, PNorm = 69.9371, GNorm = 0.5657, lr_0 = 1.8643e-04
Loss = 4.8115e-02, PNorm = 69.9417, GNorm = 0.4685, lr_0 = 1.8630e-04
Loss = 4.3953e-02, PNorm = 69.9462, GNorm = 0.4293, lr_0 = 1.8617e-04
Loss = 4.0379e-02, PNorm = 69.9515, GNorm = 0.5878, lr_0 = 1.8604e-04
Loss = 3.6130e-02, PNorm = 69.9554, GNorm = 0.4486, lr_0 = 1.8592e-04
Loss = 4.3119e-02, PNorm = 69.9587, GNorm = 0.6773, lr_0 = 1.8579e-04
Loss = 5.1134e-02, PNorm = 69.9609, GNorm = 0.4822, lr_0 = 1.8566e-04
Loss = 4.6987e-02, PNorm = 69.9606, GNorm = 0.5526, lr_0 = 1.8553e-04
Loss = 4.3161e-02, PNorm = 69.9624, GNorm = 0.4762, lr_0 = 1.8541e-04
Loss = 3.7851e-02, PNorm = 69.9655, GNorm = 0.4966, lr_0 = 1.8528e-04
Loss = 3.9734e-02, PNorm = 69.9687, GNorm = 0.4159, lr_0 = 1.8515e-04
Loss = 3.6490e-02, PNorm = 69.9721, GNorm = 0.4385, lr_0 = 1.8503e-04
Loss = 4.9611e-02, PNorm = 69.9745, GNorm = 0.6492, lr_0 = 1.8490e-04
Loss = 3.8325e-02, PNorm = 69.9758, GNorm = 0.5331, lr_0 = 1.8477e-04
Loss = 4.2190e-02, PNorm = 69.9776, GNorm = 0.5456, lr_0 = 1.8465e-04
Loss = 3.6659e-02, PNorm = 69.9804, GNorm = 0.4915, lr_0 = 1.8452e-04
Loss = 4.0683e-02, PNorm = 69.9841, GNorm = 0.4731, lr_0 = 1.8439e-04
Loss = 4.5309e-02, PNorm = 69.9883, GNorm = 0.5289, lr_0 = 1.8427e-04
Loss = 4.6868e-02, PNorm = 69.9919, GNorm = 0.7795, lr_0 = 1.8414e-04
Loss = 4.2004e-02, PNorm = 69.9943, GNorm = 0.5291, lr_0 = 1.8401e-04
Loss = 4.1317e-02, PNorm = 69.9974, GNorm = 0.4448, lr_0 = 1.8389e-04
Loss = 4.1481e-02, PNorm = 70.0003, GNorm = 0.4959, lr_0 = 1.8376e-04
Loss = 4.3243e-02, PNorm = 70.0030, GNorm = 0.5847, lr_0 = 1.8364e-04
Loss = 4.4179e-02, PNorm = 70.0065, GNorm = 0.5849, lr_0 = 1.8351e-04
Loss = 4.2659e-02, PNorm = 70.0109, GNorm = 0.4877, lr_0 = 1.8338e-04
Loss = 4.5473e-02, PNorm = 70.0133, GNorm = 0.4708, lr_0 = 1.8326e-04
Loss = 4.2840e-02, PNorm = 70.0151, GNorm = 0.4464, lr_0 = 1.8313e-04
Loss = 3.9779e-02, PNorm = 70.0192, GNorm = 0.5784, lr_0 = 1.8301e-04
Loss = 4.9287e-02, PNorm = 70.0204, GNorm = 0.4400, lr_0 = 1.8288e-04
Loss = 4.9302e-02, PNorm = 70.0224, GNorm = 0.6694, lr_0 = 1.8276e-04
Loss = 4.1732e-02, PNorm = 70.0259, GNorm = 0.6067, lr_0 = 1.8263e-04
Loss = 3.6998e-02, PNorm = 70.0288, GNorm = 0.5335, lr_0 = 1.8251e-04
Loss = 4.5389e-02, PNorm = 70.0321, GNorm = 0.5409, lr_0 = 1.8238e-04
Loss = 4.7491e-02, PNorm = 70.0369, GNorm = 0.7298, lr_0 = 1.8226e-04
Loss = 4.0235e-02, PNorm = 70.0396, GNorm = 0.6195, lr_0 = 1.8213e-04
Loss = 4.7200e-02, PNorm = 70.0419, GNorm = 0.5699, lr_0 = 1.8201e-04
Loss = 3.8619e-02, PNorm = 70.0451, GNorm = 0.4764, lr_0 = 1.8188e-04
Loss = 5.1859e-02, PNorm = 70.0474, GNorm = 0.3885, lr_0 = 1.8176e-04
Loss = 5.5138e-02, PNorm = 70.0500, GNorm = 0.5436, lr_0 = 1.8163e-04
Loss = 4.9149e-02, PNorm = 70.0522, GNorm = 0.6297, lr_0 = 1.8151e-04
Loss = 4.3602e-02, PNorm = 70.0528, GNorm = 0.5734, lr_0 = 1.8138e-04
Loss = 5.0037e-02, PNorm = 70.0548, GNorm = 0.9482, lr_0 = 1.8126e-04
Loss = 4.0058e-02, PNorm = 70.0578, GNorm = 0.4059, lr_0 = 1.8114e-04
Loss = 4.3498e-02, PNorm = 70.0621, GNorm = 0.4435, lr_0 = 1.8101e-04
Loss = 4.6539e-02, PNorm = 70.0641, GNorm = 0.6061, lr_0 = 1.8089e-04
Loss = 4.5297e-02, PNorm = 70.0678, GNorm = 0.4273, lr_0 = 1.8076e-04
Loss = 4.0578e-02, PNorm = 70.0713, GNorm = 0.4539, lr_0 = 1.8064e-04
Loss = 3.6804e-02, PNorm = 70.0753, GNorm = 0.4897, lr_0 = 1.8052e-04
Loss = 4.0410e-02, PNorm = 70.0762, GNorm = 0.4491, lr_0 = 1.8039e-04
Loss = 4.2747e-02, PNorm = 70.0772, GNorm = 0.5588, lr_0 = 1.8027e-04
Loss = 3.9797e-02, PNorm = 70.0785, GNorm = 0.4866, lr_0 = 1.8015e-04
Loss = 3.9992e-02, PNorm = 70.0810, GNorm = 0.4203, lr_0 = 1.8002e-04
Loss = 4.2047e-02, PNorm = 70.0834, GNorm = 0.4059, lr_0 = 1.7990e-04
Loss = 4.0536e-02, PNorm = 70.0858, GNorm = 0.5647, lr_0 = 1.7978e-04
Loss = 4.8230e-02, PNorm = 70.0892, GNorm = 0.4573, lr_0 = 1.7965e-04
Loss = 4.1200e-02, PNorm = 70.0924, GNorm = 0.7231, lr_0 = 1.7953e-04
Loss = 4.4540e-02, PNorm = 70.0938, GNorm = 0.3744, lr_0 = 1.7941e-04
Loss = 3.9923e-02, PNorm = 70.0964, GNorm = 0.5298, lr_0 = 1.7928e-04
Loss = 4.4444e-02, PNorm = 70.0999, GNorm = 0.5543, lr_0 = 1.7916e-04
Loss = 4.0320e-02, PNorm = 70.1022, GNorm = 0.4909, lr_0 = 1.7904e-04
Loss = 4.0294e-02, PNorm = 70.1036, GNorm = 0.4701, lr_0 = 1.7892e-04
Loss = 4.4481e-02, PNorm = 70.1071, GNorm = 0.4548, lr_0 = 1.7879e-04
Loss = 4.7983e-02, PNorm = 70.1108, GNorm = 0.5357, lr_0 = 1.7867e-04
Loss = 4.2364e-02, PNorm = 70.1124, GNorm = 0.5391, lr_0 = 1.7855e-04
Loss = 2.9976e-02, PNorm = 70.1145, GNorm = 0.5590, lr_0 = 1.7843e-04
Loss = 4.4955e-02, PNorm = 70.1189, GNorm = 0.6805, lr_0 = 1.7830e-04
Loss = 3.4444e-02, PNorm = 70.1225, GNorm = 0.5191, lr_0 = 1.7818e-04
Loss = 4.3966e-02, PNorm = 70.1261, GNorm = 0.5180, lr_0 = 1.7806e-04
Loss = 4.8957e-02, PNorm = 70.1296, GNorm = 0.4860, lr_0 = 1.7794e-04
Loss = 4.7933e-02, PNorm = 70.1322, GNorm = 0.9446, lr_0 = 1.7782e-04
Validation mae = 0.391943
Epoch 23
Loss = 3.8796e-02, PNorm = 70.1344, GNorm = 0.3366, lr_0 = 1.7769e-04
Loss = 3.4732e-02, PNorm = 70.1364, GNorm = 0.4323, lr_0 = 1.7757e-04
Loss = 3.6150e-02, PNorm = 70.1391, GNorm = 0.7693, lr_0 = 1.7745e-04
Loss = 3.5101e-02, PNorm = 70.1420, GNorm = 0.5669, lr_0 = 1.7733e-04
Loss = 3.5335e-02, PNorm = 70.1453, GNorm = 0.4518, lr_0 = 1.7721e-04
Loss = 3.8718e-02, PNorm = 70.1481, GNorm = 0.4138, lr_0 = 1.7709e-04
Loss = 3.6222e-02, PNorm = 70.1505, GNorm = 0.4789, lr_0 = 1.7696e-04
Loss = 3.6335e-02, PNorm = 70.1540, GNorm = 0.6738, lr_0 = 1.7684e-04
Loss = 3.3005e-02, PNorm = 70.1573, GNorm = 0.5188, lr_0 = 1.7672e-04
Loss = 3.4883e-02, PNorm = 70.1605, GNorm = 0.4436, lr_0 = 1.7660e-04
Loss = 3.5537e-02, PNorm = 70.1625, GNorm = 0.4299, lr_0 = 1.7648e-04
Loss = 4.5450e-02, PNorm = 70.1651, GNorm = 0.6159, lr_0 = 1.7636e-04
Loss = 4.1953e-02, PNorm = 70.1679, GNorm = 0.5264, lr_0 = 1.7624e-04
Loss = 4.0461e-02, PNorm = 70.1703, GNorm = 0.4838, lr_0 = 1.7612e-04
Loss = 3.4010e-02, PNorm = 70.1745, GNorm = 0.4209, lr_0 = 1.7600e-04
Loss = 3.6860e-02, PNorm = 70.1772, GNorm = 0.3794, lr_0 = 1.7588e-04
Loss = 4.0853e-02, PNorm = 70.1800, GNorm = 0.5117, lr_0 = 1.7576e-04
Loss = 4.0817e-02, PNorm = 70.1829, GNorm = 0.4093, lr_0 = 1.7564e-04
Loss = 3.8335e-02, PNorm = 70.1864, GNorm = 0.5246, lr_0 = 1.7552e-04
Loss = 3.7665e-02, PNorm = 70.1896, GNorm = 0.4875, lr_0 = 1.7540e-04
Loss = 3.9198e-02, PNorm = 70.1921, GNorm = 0.5586, lr_0 = 1.7528e-04
Loss = 3.4279e-02, PNorm = 70.1947, GNorm = 0.7490, lr_0 = 1.7516e-04
Loss = 3.4720e-02, PNorm = 70.1976, GNorm = 0.5981, lr_0 = 1.7504e-04
Loss = 3.9281e-02, PNorm = 70.2008, GNorm = 0.7176, lr_0 = 1.7492e-04
Loss = 3.9163e-02, PNorm = 70.2037, GNorm = 0.7188, lr_0 = 1.7480e-04
Loss = 3.6923e-02, PNorm = 70.2066, GNorm = 0.3798, lr_0 = 1.7468e-04
Loss = 3.4958e-02, PNorm = 70.2095, GNorm = 0.4946, lr_0 = 1.7456e-04
Loss = 4.4546e-02, PNorm = 70.2126, GNorm = 0.4544, lr_0 = 1.7444e-04
Loss = 4.0073e-02, PNorm = 70.2150, GNorm = 0.4071, lr_0 = 1.7432e-04
Loss = 3.7795e-02, PNorm = 70.2156, GNorm = 0.5303, lr_0 = 1.7420e-04
Loss = 4.3347e-02, PNorm = 70.2192, GNorm = 0.6088, lr_0 = 1.7408e-04
Loss = 3.8885e-02, PNorm = 70.2229, GNorm = 0.5330, lr_0 = 1.7396e-04
Loss = 3.8200e-02, PNorm = 70.2241, GNorm = 0.4579, lr_0 = 1.7384e-04
Loss = 3.4061e-02, PNorm = 70.2250, GNorm = 0.4661, lr_0 = 1.7372e-04
Loss = 3.1233e-02, PNorm = 70.2276, GNorm = 0.4128, lr_0 = 1.7360e-04
Loss = 3.7906e-02, PNorm = 70.2313, GNorm = 0.5459, lr_0 = 1.7348e-04
Loss = 4.0676e-02, PNorm = 70.2349, GNorm = 0.4699, lr_0 = 1.7336e-04
Loss = 3.9256e-02, PNorm = 70.2344, GNorm = 0.4043, lr_0 = 1.7325e-04
Loss = 4.2036e-02, PNorm = 70.2356, GNorm = 0.5061, lr_0 = 1.7313e-04
Loss = 3.1821e-02, PNorm = 70.2374, GNorm = 0.4564, lr_0 = 1.7301e-04
Loss = 3.6984e-02, PNorm = 70.2391, GNorm = 0.6511, lr_0 = 1.7289e-04
Loss = 4.3061e-02, PNorm = 70.2428, GNorm = 0.5601, lr_0 = 1.7277e-04
Loss = 3.7127e-02, PNorm = 70.2443, GNorm = 0.5270, lr_0 = 1.7265e-04
Loss = 3.7535e-02, PNorm = 70.2479, GNorm = 0.4348, lr_0 = 1.7253e-04
Loss = 4.2858e-02, PNorm = 70.2511, GNorm = 0.5853, lr_0 = 1.7242e-04
Loss = 3.8274e-02, PNorm = 70.2538, GNorm = 0.4826, lr_0 = 1.7230e-04
Loss = 4.5397e-02, PNorm = 70.2551, GNorm = 0.5713, lr_0 = 1.7218e-04
Loss = 3.3156e-02, PNorm = 70.2570, GNorm = 0.5506, lr_0 = 1.7206e-04
Loss = 4.1406e-02, PNorm = 70.2590, GNorm = 0.6467, lr_0 = 1.7194e-04
Loss = 4.1715e-02, PNorm = 70.2615, GNorm = 0.4329, lr_0 = 1.7183e-04
Loss = 3.5907e-02, PNorm = 70.2657, GNorm = 0.6748, lr_0 = 1.7171e-04
Loss = 4.0636e-02, PNorm = 70.2697, GNorm = 0.4398, lr_0 = 1.7159e-04
Loss = 4.4561e-02, PNorm = 70.2715, GNorm = 0.9205, lr_0 = 1.7147e-04
Loss = 3.9658e-02, PNorm = 70.2742, GNorm = 0.4578, lr_0 = 1.7136e-04
Loss = 3.7077e-02, PNorm = 70.2762, GNorm = 0.4519, lr_0 = 1.7124e-04
Loss = 4.1244e-02, PNorm = 70.2778, GNorm = 0.5159, lr_0 = 1.7112e-04
Loss = 4.1994e-02, PNorm = 70.2809, GNorm = 0.8638, lr_0 = 1.7100e-04
Loss = 3.6583e-02, PNorm = 70.2826, GNorm = 0.3628, lr_0 = 1.7089e-04
Loss = 4.1716e-02, PNorm = 70.2844, GNorm = 0.5112, lr_0 = 1.7077e-04
Loss = 3.8835e-02, PNorm = 70.2870, GNorm = 0.6644, lr_0 = 1.7065e-04
Loss = 4.1067e-02, PNorm = 70.2898, GNorm = 0.4969, lr_0 = 1.7054e-04
Loss = 3.4854e-02, PNorm = 70.2917, GNorm = 0.4696, lr_0 = 1.7042e-04
Loss = 3.7391e-02, PNorm = 70.2937, GNorm = 0.5876, lr_0 = 1.7030e-04
Loss = 3.6709e-02, PNorm = 70.2969, GNorm = 0.6375, lr_0 = 1.7019e-04
Loss = 3.5366e-02, PNorm = 70.3009, GNorm = 0.3862, lr_0 = 1.7007e-04
Loss = 3.6953e-02, PNorm = 70.3032, GNorm = 0.5286, lr_0 = 1.6995e-04
Loss = 3.8850e-02, PNorm = 70.3036, GNorm = 0.5102, lr_0 = 1.6984e-04
Loss = 3.8277e-02, PNorm = 70.3043, GNorm = 0.4539, lr_0 = 1.6972e-04
Loss = 3.9637e-02, PNorm = 70.3057, GNorm = 0.7371, lr_0 = 1.6960e-04
Loss = 4.1902e-02, PNorm = 70.3081, GNorm = 0.4474, lr_0 = 1.6949e-04
Loss = 3.6409e-02, PNorm = 70.3102, GNorm = 0.4500, lr_0 = 1.6937e-04
Loss = 4.2234e-02, PNorm = 70.3132, GNorm = 0.5547, lr_0 = 1.6926e-04
Loss = 4.0588e-02, PNorm = 70.3166, GNorm = 0.5356, lr_0 = 1.6914e-04
Loss = 4.2443e-02, PNorm = 70.3202, GNorm = 0.7353, lr_0 = 1.6902e-04
Loss = 3.7569e-02, PNorm = 70.3222, GNorm = 0.8082, lr_0 = 1.6891e-04
Loss = 3.9179e-02, PNorm = 70.3232, GNorm = 0.4976, lr_0 = 1.6879e-04
Loss = 3.6797e-02, PNorm = 70.3245, GNorm = 0.5337, lr_0 = 1.6868e-04
Loss = 4.0887e-02, PNorm = 70.3275, GNorm = 0.5432, lr_0 = 1.6856e-04
Loss = 3.7874e-02, PNorm = 70.3306, GNorm = 0.7188, lr_0 = 1.6845e-04
Loss = 4.1216e-02, PNorm = 70.3345, GNorm = 0.4415, lr_0 = 1.6833e-04
Loss = 3.9264e-02, PNorm = 70.3370, GNorm = 0.7745, lr_0 = 1.6821e-04
Loss = 4.4461e-02, PNorm = 70.3387, GNorm = 0.4933, lr_0 = 1.6810e-04
Loss = 3.9621e-02, PNorm = 70.3430, GNorm = 0.5827, lr_0 = 1.6798e-04
Loss = 3.8665e-02, PNorm = 70.3465, GNorm = 0.4112, lr_0 = 1.6787e-04
Loss = 4.2897e-02, PNorm = 70.3473, GNorm = 0.4980, lr_0 = 1.6775e-04
Loss = 4.0775e-02, PNorm = 70.3476, GNorm = 0.3510, lr_0 = 1.6764e-04
Loss = 3.5999e-02, PNorm = 70.3488, GNorm = 0.3338, lr_0 = 1.6752e-04
Loss = 3.5833e-02, PNorm = 70.3512, GNorm = 0.3545, lr_0 = 1.6741e-04
Loss = 3.5112e-02, PNorm = 70.3535, GNorm = 0.4022, lr_0 = 1.6729e-04
Loss = 4.1521e-02, PNorm = 70.3561, GNorm = 0.3721, lr_0 = 1.6718e-04
Loss = 3.6667e-02, PNorm = 70.3584, GNorm = 0.4368, lr_0 = 1.6707e-04
Loss = 3.8263e-02, PNorm = 70.3598, GNorm = 0.6988, lr_0 = 1.6695e-04
Loss = 3.7998e-02, PNorm = 70.3637, GNorm = 0.4072, lr_0 = 1.6684e-04
Loss = 3.4574e-02, PNorm = 70.3681, GNorm = 0.4478, lr_0 = 1.6672e-04
Loss = 4.5773e-02, PNorm = 70.3707, GNorm = 0.4472, lr_0 = 1.6661e-04
Loss = 4.6667e-02, PNorm = 70.3738, GNorm = 0.6205, lr_0 = 1.6649e-04
Loss = 4.0173e-02, PNorm = 70.3761, GNorm = 0.6640, lr_0 = 1.6638e-04
Loss = 4.2734e-02, PNorm = 70.3780, GNorm = 0.4126, lr_0 = 1.6627e-04
Loss = 3.9172e-02, PNorm = 70.3803, GNorm = 0.4906, lr_0 = 1.6615e-04
Loss = 4.0045e-02, PNorm = 70.3839, GNorm = 0.5447, lr_0 = 1.6604e-04
Loss = 3.9594e-02, PNorm = 70.3862, GNorm = 0.4413, lr_0 = 1.6592e-04
Loss = 4.0919e-02, PNorm = 70.3863, GNorm = 0.5908, lr_0 = 1.6581e-04
Loss = 4.2640e-02, PNorm = 70.3869, GNorm = 0.5007, lr_0 = 1.6570e-04
Loss = 3.7914e-02, PNorm = 70.3888, GNorm = 0.5433, lr_0 = 1.6558e-04
Loss = 4.3552e-02, PNorm = 70.3911, GNorm = 0.5705, lr_0 = 1.6547e-04
Loss = 3.8966e-02, PNorm = 70.3947, GNorm = 0.6338, lr_0 = 1.6536e-04
Loss = 3.8499e-02, PNorm = 70.3989, GNorm = 0.4270, lr_0 = 1.6524e-04
Loss = 4.5715e-02, PNorm = 70.4029, GNorm = 0.4296, lr_0 = 1.6513e-04
Loss = 4.0180e-02, PNorm = 70.4047, GNorm = 0.5858, lr_0 = 1.6502e-04
Loss = 5.3820e-02, PNorm = 70.4070, GNorm = 0.3830, lr_0 = 1.6490e-04
Loss = 5.6759e-02, PNorm = 70.4120, GNorm = 0.7183, lr_0 = 1.6479e-04
Loss = 4.6924e-02, PNorm = 70.4167, GNorm = 0.9748, lr_0 = 1.6468e-04
Loss = 4.5501e-02, PNorm = 70.4199, GNorm = 0.3863, lr_0 = 1.6457e-04
Loss = 4.0244e-02, PNorm = 70.4221, GNorm = 0.7932, lr_0 = 1.6445e-04
Loss = 3.9576e-02, PNorm = 70.4247, GNorm = 0.4458, lr_0 = 1.6434e-04
Loss = 4.4445e-02, PNorm = 70.4281, GNorm = 0.5081, lr_0 = 1.6423e-04
Loss = 4.9932e-02, PNorm = 70.4312, GNorm = 0.5063, lr_0 = 1.6412e-04
Loss = 3.9468e-02, PNorm = 70.4349, GNorm = 0.5670, lr_0 = 1.6400e-04
Loss = 4.6925e-02, PNorm = 70.4389, GNorm = 0.5881, lr_0 = 1.6389e-04
Loss = 3.9057e-02, PNorm = 70.4405, GNorm = 0.5564, lr_0 = 1.6378e-04
Validation mae = 0.390229
Epoch 24
Loss = 3.6037e-02, PNorm = 70.4425, GNorm = 0.7051, lr_0 = 1.6367e-04
Loss = 4.0184e-02, PNorm = 70.4455, GNorm = 0.4729, lr_0 = 1.6355e-04
Loss = 3.4017e-02, PNorm = 70.4481, GNorm = 0.3556, lr_0 = 1.6344e-04
Loss = 4.0282e-02, PNorm = 70.4496, GNorm = 0.5406, lr_0 = 1.6333e-04
Loss = 3.8517e-02, PNorm = 70.4525, GNorm = 0.4768, lr_0 = 1.6322e-04
Loss = 3.3133e-02, PNorm = 70.4555, GNorm = 0.2966, lr_0 = 1.6311e-04
Loss = 3.2879e-02, PNorm = 70.4569, GNorm = 0.3271, lr_0 = 1.6299e-04
Loss = 3.6444e-02, PNorm = 70.4590, GNorm = 0.4355, lr_0 = 1.6288e-04
Loss = 3.3794e-02, PNorm = 70.4625, GNorm = 0.5311, lr_0 = 1.6277e-04
Loss = 3.2613e-02, PNorm = 70.4656, GNorm = 0.3896, lr_0 = 1.6266e-04
Loss = 3.3983e-02, PNorm = 70.4686, GNorm = 0.4057, lr_0 = 1.6255e-04
Loss = 2.9910e-02, PNorm = 70.4705, GNorm = 0.3231, lr_0 = 1.6244e-04
Loss = 3.1864e-02, PNorm = 70.4720, GNorm = 0.6031, lr_0 = 1.6233e-04
Loss = 3.9072e-02, PNorm = 70.4736, GNorm = 0.4097, lr_0 = 1.6221e-04
Loss = 3.8608e-02, PNorm = 70.4742, GNorm = 0.3389, lr_0 = 1.6210e-04
Loss = 3.2513e-02, PNorm = 70.4764, GNorm = 0.3416, lr_0 = 1.6199e-04
Loss = 3.4175e-02, PNorm = 70.4788, GNorm = 0.5603, lr_0 = 1.6188e-04
Loss = 3.6051e-02, PNorm = 70.4813, GNorm = 0.5635, lr_0 = 1.6177e-04
Loss = 4.1426e-02, PNorm = 70.4831, GNorm = 0.5654, lr_0 = 1.6166e-04
Loss = 3.5716e-02, PNorm = 70.4851, GNorm = 0.3704, lr_0 = 1.6155e-04
Loss = 3.5586e-02, PNorm = 70.4880, GNorm = 0.4619, lr_0 = 1.6144e-04
Loss = 4.0816e-02, PNorm = 70.4909, GNorm = 0.5642, lr_0 = 1.6133e-04
Loss = 3.2998e-02, PNorm = 70.4923, GNorm = 0.6990, lr_0 = 1.6122e-04
Loss = 3.7991e-02, PNorm = 70.4949, GNorm = 0.5490, lr_0 = 1.6111e-04
Loss = 3.3023e-02, PNorm = 70.4987, GNorm = 0.4043, lr_0 = 1.6100e-04
Loss = 3.9246e-02, PNorm = 70.5030, GNorm = 0.4752, lr_0 = 1.6089e-04
Loss = 3.2520e-02, PNorm = 70.5066, GNorm = 0.5330, lr_0 = 1.6078e-04
Loss = 3.6395e-02, PNorm = 70.5089, GNorm = 0.5919, lr_0 = 1.6067e-04
Loss = 3.1874e-02, PNorm = 70.5114, GNorm = 0.4091, lr_0 = 1.6056e-04
Loss = 3.8456e-02, PNorm = 70.5137, GNorm = 0.3933, lr_0 = 1.6045e-04
Loss = 3.8412e-02, PNorm = 70.5155, GNorm = 0.6221, lr_0 = 1.6034e-04
Loss = 3.2286e-02, PNorm = 70.5174, GNorm = 0.4261, lr_0 = 1.6023e-04
Loss = 3.6111e-02, PNorm = 70.5200, GNorm = 0.6249, lr_0 = 1.6012e-04
Loss = 3.7465e-02, PNorm = 70.5223, GNorm = 0.5674, lr_0 = 1.6001e-04
Loss = 3.8280e-02, PNorm = 70.5249, GNorm = 0.5004, lr_0 = 1.5990e-04
Loss = 3.7915e-02, PNorm = 70.5270, GNorm = 0.3757, lr_0 = 1.5979e-04
Loss = 3.5307e-02, PNorm = 70.5296, GNorm = 0.4175, lr_0 = 1.5968e-04
Loss = 3.3808e-02, PNorm = 70.5328, GNorm = 0.4950, lr_0 = 1.5957e-04
Loss = 3.5705e-02, PNorm = 70.5365, GNorm = 0.5405, lr_0 = 1.5946e-04
Loss = 3.9109e-02, PNorm = 70.5390, GNorm = 0.5093, lr_0 = 1.5935e-04
Loss = 3.7257e-02, PNorm = 70.5414, GNorm = 0.5718, lr_0 = 1.5924e-04
Loss = 3.5486e-02, PNorm = 70.5440, GNorm = 0.4681, lr_0 = 1.5913e-04
Loss = 3.3371e-02, PNorm = 70.5458, GNorm = 0.4042, lr_0 = 1.5902e-04
Loss = 3.3674e-02, PNorm = 70.5471, GNorm = 0.4226, lr_0 = 1.5891e-04
Loss = 3.8838e-02, PNorm = 70.5494, GNorm = 0.4447, lr_0 = 1.5880e-04
Loss = 3.0446e-02, PNorm = 70.5523, GNorm = 0.4359, lr_0 = 1.5870e-04
Loss = 3.7087e-02, PNorm = 70.5550, GNorm = 0.4921, lr_0 = 1.5859e-04
Loss = 3.9705e-02, PNorm = 70.5574, GNorm = 0.4315, lr_0 = 1.5848e-04
Loss = 4.0676e-02, PNorm = 70.5607, GNorm = 0.4234, lr_0 = 1.5837e-04
Loss = 3.1121e-02, PNorm = 70.5616, GNorm = 0.4542, lr_0 = 1.5826e-04
Loss = 4.1512e-02, PNorm = 70.5621, GNorm = 0.5413, lr_0 = 1.5815e-04
Loss = 3.4098e-02, PNorm = 70.5641, GNorm = 0.4926, lr_0 = 1.5804e-04
Loss = 3.2074e-02, PNorm = 70.5662, GNorm = 0.5201, lr_0 = 1.5794e-04
Loss = 4.0444e-02, PNorm = 70.5684, GNorm = 0.3852, lr_0 = 1.5783e-04
Loss = 4.0311e-02, PNorm = 70.5713, GNorm = 0.6927, lr_0 = 1.5772e-04
Loss = 3.6690e-02, PNorm = 70.5743, GNorm = 0.4423, lr_0 = 1.5761e-04
Loss = 3.9768e-02, PNorm = 70.5765, GNorm = 0.6639, lr_0 = 1.5750e-04
Loss = 4.0389e-02, PNorm = 70.5786, GNorm = 0.3641, lr_0 = 1.5740e-04
Loss = 4.0120e-02, PNorm = 70.5817, GNorm = 0.4681, lr_0 = 1.5729e-04
Loss = 3.7535e-02, PNorm = 70.5850, GNorm = 0.4882, lr_0 = 1.5718e-04
Loss = 4.0549e-02, PNorm = 70.5874, GNorm = 0.6191, lr_0 = 1.5707e-04
Loss = 3.2558e-02, PNorm = 70.5888, GNorm = 0.4951, lr_0 = 1.5697e-04
Loss = 3.7736e-02, PNorm = 70.5910, GNorm = 0.7397, lr_0 = 1.5686e-04
Loss = 3.0994e-02, PNorm = 70.5933, GNorm = 0.3506, lr_0 = 1.5675e-04
Loss = 3.6021e-02, PNorm = 70.5955, GNorm = 0.4226, lr_0 = 1.5664e-04
Loss = 3.2917e-02, PNorm = 70.5976, GNorm = 0.3498, lr_0 = 1.5654e-04
Loss = 4.0901e-02, PNorm = 70.5999, GNorm = 0.8967, lr_0 = 1.5643e-04
Loss = 3.6646e-02, PNorm = 70.6020, GNorm = 0.3907, lr_0 = 1.5632e-04
Loss = 3.4868e-02, PNorm = 70.6037, GNorm = 0.4963, lr_0 = 1.5621e-04
Loss = 3.5144e-02, PNorm = 70.6057, GNorm = 0.5327, lr_0 = 1.5611e-04
Loss = 3.6704e-02, PNorm = 70.6080, GNorm = 0.3125, lr_0 = 1.5600e-04
Loss = 4.5975e-02, PNorm = 70.6095, GNorm = 0.6262, lr_0 = 1.5589e-04
Loss = 3.7289e-02, PNorm = 70.6123, GNorm = 0.8312, lr_0 = 1.5579e-04
Loss = 3.9762e-02, PNorm = 70.6146, GNorm = 0.4647, lr_0 = 1.5568e-04
Loss = 3.4207e-02, PNorm = 70.6155, GNorm = 0.9745, lr_0 = 1.5557e-04
Loss = 3.9881e-02, PNorm = 70.6176, GNorm = 0.8687, lr_0 = 1.5547e-04
Loss = 4.6428e-02, PNorm = 70.6206, GNorm = 0.6797, lr_0 = 1.5536e-04
Loss = 4.0617e-02, PNorm = 70.6226, GNorm = 0.4242, lr_0 = 1.5525e-04
Loss = 3.8746e-02, PNorm = 70.6247, GNorm = 0.4164, lr_0 = 1.5515e-04
Loss = 4.1578e-02, PNorm = 70.6270, GNorm = 0.6660, lr_0 = 1.5504e-04
Loss = 3.6281e-02, PNorm = 70.6299, GNorm = 0.4866, lr_0 = 1.5493e-04
Loss = 3.5853e-02, PNorm = 70.6327, GNorm = 0.5068, lr_0 = 1.5483e-04
Loss = 3.6967e-02, PNorm = 70.6345, GNorm = 0.5665, lr_0 = 1.5472e-04
Loss = 4.2277e-02, PNorm = 70.6361, GNorm = 0.4686, lr_0 = 1.5462e-04
Loss = 3.5462e-02, PNorm = 70.6381, GNorm = 0.4981, lr_0 = 1.5451e-04
Loss = 3.6954e-02, PNorm = 70.6403, GNorm = 0.6401, lr_0 = 1.5440e-04
Loss = 4.0102e-02, PNorm = 70.6427, GNorm = 0.8244, lr_0 = 1.5430e-04
Loss = 3.9595e-02, PNorm = 70.6451, GNorm = 0.7145, lr_0 = 1.5419e-04
Loss = 4.1448e-02, PNorm = 70.6472, GNorm = 0.5491, lr_0 = 1.5409e-04
Loss = 3.4129e-02, PNorm = 70.6495, GNorm = 0.4347, lr_0 = 1.5398e-04
Loss = 3.3093e-02, PNorm = 70.6525, GNorm = 0.5496, lr_0 = 1.5388e-04
Loss = 4.0831e-02, PNorm = 70.6557, GNorm = 0.5220, lr_0 = 1.5377e-04
Loss = 3.6060e-02, PNorm = 70.6570, GNorm = 0.5440, lr_0 = 1.5367e-04
Loss = 3.7306e-02, PNorm = 70.6597, GNorm = 0.6528, lr_0 = 1.5356e-04
Loss = 3.7550e-02, PNorm = 70.6623, GNorm = 0.3451, lr_0 = 1.5346e-04
Loss = 3.9128e-02, PNorm = 70.6638, GNorm = 0.4969, lr_0 = 1.5335e-04
Loss = 3.9590e-02, PNorm = 70.6663, GNorm = 0.5017, lr_0 = 1.5325e-04
Loss = 3.7573e-02, PNorm = 70.6685, GNorm = 0.3688, lr_0 = 1.5314e-04
Loss = 3.4783e-02, PNorm = 70.6714, GNorm = 0.4113, lr_0 = 1.5304e-04
Loss = 3.9448e-02, PNorm = 70.6753, GNorm = 0.7171, lr_0 = 1.5293e-04
Loss = 3.5971e-02, PNorm = 70.6758, GNorm = 0.4296, lr_0 = 1.5283e-04
Loss = 3.9268e-02, PNorm = 70.6782, GNorm = 0.4262, lr_0 = 1.5272e-04
Loss = 3.5491e-02, PNorm = 70.6818, GNorm = 0.6119, lr_0 = 1.5262e-04
Loss = 4.0585e-02, PNorm = 70.6834, GNorm = 0.4429, lr_0 = 1.5251e-04
Loss = 4.0103e-02, PNorm = 70.6852, GNorm = 0.5399, lr_0 = 1.5241e-04
Loss = 3.6796e-02, PNorm = 70.6871, GNorm = 0.4274, lr_0 = 1.5230e-04
Loss = 4.3156e-02, PNorm = 70.6897, GNorm = 0.4549, lr_0 = 1.5220e-04
Loss = 4.1363e-02, PNorm = 70.6906, GNorm = 0.7746, lr_0 = 1.5209e-04
Loss = 4.0349e-02, PNorm = 70.6913, GNorm = 0.6505, lr_0 = 1.5199e-04
Loss = 4.2736e-02, PNorm = 70.6941, GNorm = 0.7033, lr_0 = 1.5189e-04
Loss = 3.8215e-02, PNorm = 70.6966, GNorm = 0.4532, lr_0 = 1.5178e-04
Loss = 3.4877e-02, PNorm = 70.6981, GNorm = 0.6244, lr_0 = 1.5168e-04
Loss = 3.8257e-02, PNorm = 70.7004, GNorm = 0.3989, lr_0 = 1.5157e-04
Loss = 4.5126e-02, PNorm = 70.7034, GNorm = 0.5569, lr_0 = 1.5147e-04
Loss = 4.1895e-02, PNorm = 70.7064, GNorm = 0.7538, lr_0 = 1.5137e-04
Loss = 4.2245e-02, PNorm = 70.7081, GNorm = 0.4521, lr_0 = 1.5126e-04
Loss = 3.9956e-02, PNorm = 70.7101, GNorm = 0.4927, lr_0 = 1.5116e-04
Loss = 3.5726e-02, PNorm = 70.7122, GNorm = 0.5914, lr_0 = 1.5106e-04
Loss = 3.9205e-02, PNorm = 70.7135, GNorm = 0.6707, lr_0 = 1.5095e-04
Loss = 3.8528e-02, PNorm = 70.7148, GNorm = 0.3497, lr_0 = 1.5085e-04
Validation mae = 0.386195
Epoch 25
Loss = 3.8931e-02, PNorm = 70.7176, GNorm = 0.3425, lr_0 = 1.5075e-04
Loss = 3.4434e-02, PNorm = 70.7201, GNorm = 0.5930, lr_0 = 1.5064e-04
Loss = 3.5420e-02, PNorm = 70.7240, GNorm = 0.4060, lr_0 = 1.5054e-04
Loss = 3.3611e-02, PNorm = 70.7272, GNorm = 0.4817, lr_0 = 1.5044e-04
Loss = 3.0936e-02, PNorm = 70.7288, GNorm = 0.4243, lr_0 = 1.5033e-04
Loss = 2.8363e-02, PNorm = 70.7316, GNorm = 0.3391, lr_0 = 1.5023e-04
Loss = 3.4478e-02, PNorm = 70.7330, GNorm = 0.6469, lr_0 = 1.5013e-04
Loss = 3.2882e-02, PNorm = 70.7350, GNorm = 0.3945, lr_0 = 1.5002e-04
Loss = 3.3027e-02, PNorm = 70.7367, GNorm = 0.4538, lr_0 = 1.4992e-04
Loss = 3.7653e-02, PNorm = 70.7396, GNorm = 0.4069, lr_0 = 1.4982e-04
Loss = 3.2887e-02, PNorm = 70.7420, GNorm = 0.4872, lr_0 = 1.4972e-04
Loss = 3.1564e-02, PNorm = 70.7439, GNorm = 0.5137, lr_0 = 1.4961e-04
Loss = 3.6247e-02, PNorm = 70.7446, GNorm = 0.5461, lr_0 = 1.4951e-04
Loss = 3.1842e-02, PNorm = 70.7461, GNorm = 0.6341, lr_0 = 1.4941e-04
Loss = 3.3801e-02, PNorm = 70.7474, GNorm = 0.6330, lr_0 = 1.4931e-04
Loss = 3.3850e-02, PNorm = 70.7494, GNorm = 0.4808, lr_0 = 1.4920e-04
Loss = 3.0710e-02, PNorm = 70.7533, GNorm = 0.3461, lr_0 = 1.4910e-04
Loss = 3.9540e-02, PNorm = 70.7566, GNorm = 0.5062, lr_0 = 1.4900e-04
Loss = 2.9648e-02, PNorm = 70.7598, GNorm = 0.4625, lr_0 = 1.4890e-04
Loss = 3.6430e-02, PNorm = 70.7628, GNorm = 0.5392, lr_0 = 1.4880e-04
Loss = 3.0226e-02, PNorm = 70.7650, GNorm = 0.4194, lr_0 = 1.4869e-04
Loss = 3.0498e-02, PNorm = 70.7664, GNorm = 0.4145, lr_0 = 1.4859e-04
Loss = 3.5968e-02, PNorm = 70.7682, GNorm = 0.5341, lr_0 = 1.4849e-04
Loss = 3.0021e-02, PNorm = 70.7706, GNorm = 1.0068, lr_0 = 1.4839e-04
Loss = 3.5525e-02, PNorm = 70.7739, GNorm = 0.4527, lr_0 = 1.4829e-04
Loss = 3.6199e-02, PNorm = 70.7781, GNorm = 0.3961, lr_0 = 1.4818e-04
Loss = 3.1384e-02, PNorm = 70.7807, GNorm = 0.5861, lr_0 = 1.4808e-04
Loss = 3.1820e-02, PNorm = 70.7827, GNorm = 0.4673, lr_0 = 1.4798e-04
Loss = 3.7575e-02, PNorm = 70.7834, GNorm = 0.4625, lr_0 = 1.4788e-04
Loss = 3.3882e-02, PNorm = 70.7849, GNorm = 0.5190, lr_0 = 1.4778e-04
Loss = 2.7594e-02, PNorm = 70.7874, GNorm = 0.5315, lr_0 = 1.4768e-04
Loss = 3.7339e-02, PNorm = 70.7893, GNorm = 0.4433, lr_0 = 1.4758e-04
Loss = 3.7411e-02, PNorm = 70.7905, GNorm = 0.5379, lr_0 = 1.4748e-04
Loss = 3.4892e-02, PNorm = 70.7924, GNorm = 0.5394, lr_0 = 1.4737e-04
Loss = 3.6792e-02, PNorm = 70.7950, GNorm = 0.4493, lr_0 = 1.4727e-04
Loss = 3.0151e-02, PNorm = 70.7970, GNorm = 0.7196, lr_0 = 1.4717e-04
Loss = 3.5608e-02, PNorm = 70.7985, GNorm = 0.4058, lr_0 = 1.4707e-04
Loss = 3.0959e-02, PNorm = 70.7998, GNorm = 0.4393, lr_0 = 1.4697e-04
Loss = 3.0899e-02, PNorm = 70.8022, GNorm = 0.6108, lr_0 = 1.4687e-04
Loss = 2.9696e-02, PNorm = 70.8048, GNorm = 0.4880, lr_0 = 1.4677e-04
Loss = 3.3282e-02, PNorm = 70.8074, GNorm = 0.4478, lr_0 = 1.4667e-04
Loss = 3.7175e-02, PNorm = 70.8098, GNorm = 0.5242, lr_0 = 1.4657e-04
Loss = 3.3575e-02, PNorm = 70.8121, GNorm = 0.3592, lr_0 = 1.4647e-04
Loss = 4.0359e-02, PNorm = 70.8141, GNorm = 0.5626, lr_0 = 1.4637e-04
Loss = 3.4687e-02, PNorm = 70.8153, GNorm = 0.3982, lr_0 = 1.4627e-04
Loss = 3.6794e-02, PNorm = 70.8180, GNorm = 0.3215, lr_0 = 1.4617e-04
Loss = 3.6242e-02, PNorm = 70.8211, GNorm = 0.5404, lr_0 = 1.4607e-04
Loss = 2.8712e-02, PNorm = 70.8242, GNorm = 0.5441, lr_0 = 1.4597e-04
Loss = 3.6864e-02, PNorm = 70.8272, GNorm = 0.5345, lr_0 = 1.4587e-04
Loss = 3.4562e-02, PNorm = 70.8295, GNorm = 0.4783, lr_0 = 1.4577e-04
Loss = 3.3468e-02, PNorm = 70.8319, GNorm = 0.4577, lr_0 = 1.4567e-04
Loss = 3.4557e-02, PNorm = 70.8346, GNorm = 0.6403, lr_0 = 1.4557e-04
Loss = 3.0472e-02, PNorm = 70.8367, GNorm = 0.5282, lr_0 = 1.4547e-04
Loss = 3.4907e-02, PNorm = 70.8384, GNorm = 0.4437, lr_0 = 1.4537e-04
Loss = 3.6048e-02, PNorm = 70.8410, GNorm = 0.7251, lr_0 = 1.4527e-04
Loss = 3.2184e-02, PNorm = 70.8444, GNorm = 0.4276, lr_0 = 1.4517e-04
Loss = 4.1073e-02, PNorm = 70.8468, GNorm = 1.0129, lr_0 = 1.4507e-04
Loss = 3.4043e-02, PNorm = 70.8481, GNorm = 0.4513, lr_0 = 1.4497e-04
Loss = 4.1616e-02, PNorm = 70.8508, GNorm = 0.5085, lr_0 = 1.4487e-04
Loss = 3.4115e-02, PNorm = 70.8537, GNorm = 0.6814, lr_0 = 1.4477e-04
Loss = 3.2770e-02, PNorm = 70.8574, GNorm = 0.6001, lr_0 = 1.4467e-04
Loss = 4.2777e-02, PNorm = 70.8598, GNorm = 0.4733, lr_0 = 1.4457e-04
Loss = 3.6487e-02, PNorm = 70.8610, GNorm = 0.4145, lr_0 = 1.4447e-04
Loss = 3.5719e-02, PNorm = 70.8632, GNorm = 0.3929, lr_0 = 1.4438e-04
Loss = 3.5687e-02, PNorm = 70.8659, GNorm = 0.6093, lr_0 = 1.4428e-04
Loss = 3.1751e-02, PNorm = 70.8677, GNorm = 0.3787, lr_0 = 1.4418e-04
Loss = 3.6296e-02, PNorm = 70.8688, GNorm = 0.5798, lr_0 = 1.4408e-04
Loss = 3.3588e-02, PNorm = 70.8698, GNorm = 0.5050, lr_0 = 1.4398e-04
Loss = 3.7824e-02, PNorm = 70.8725, GNorm = 0.4157, lr_0 = 1.4388e-04
Loss = 3.4389e-02, PNorm = 70.8745, GNorm = 0.4822, lr_0 = 1.4378e-04
Loss = 3.5983e-02, PNorm = 70.8757, GNorm = 0.5964, lr_0 = 1.4368e-04
Loss = 3.0829e-02, PNorm = 70.8766, GNorm = 0.4827, lr_0 = 1.4359e-04
Loss = 3.6510e-02, PNorm = 70.8779, GNorm = 0.5129, lr_0 = 1.4349e-04
Loss = 3.2777e-02, PNorm = 70.8784, GNorm = 0.6280, lr_0 = 1.4339e-04
Loss = 3.3430e-02, PNorm = 70.8809, GNorm = 0.5235, lr_0 = 1.4329e-04
Loss = 3.4898e-02, PNorm = 70.8831, GNorm = 0.4424, lr_0 = 1.4319e-04
Loss = 4.1978e-02, PNorm = 70.8850, GNorm = 0.5841, lr_0 = 1.4310e-04
Loss = 4.1101e-02, PNorm = 70.8867, GNorm = 0.4355, lr_0 = 1.4300e-04
Loss = 3.9142e-02, PNorm = 70.8902, GNorm = 0.5830, lr_0 = 1.4290e-04
Loss = 3.8766e-02, PNorm = 70.8926, GNorm = 0.4961, lr_0 = 1.4280e-04
Loss = 3.0667e-02, PNorm = 70.8955, GNorm = 0.3848, lr_0 = 1.4270e-04
Loss = 3.3473e-02, PNorm = 70.8972, GNorm = 0.7520, lr_0 = 1.4261e-04
Loss = 3.5071e-02, PNorm = 70.8989, GNorm = 0.5144, lr_0 = 1.4251e-04
Loss = 3.3623e-02, PNorm = 70.9009, GNorm = 0.4334, lr_0 = 1.4241e-04
Loss = 3.5612e-02, PNorm = 70.9028, GNorm = 0.4600, lr_0 = 1.4231e-04
Loss = 3.9208e-02, PNorm = 70.9061, GNorm = 0.5916, lr_0 = 1.4222e-04
Loss = 3.8498e-02, PNorm = 70.9076, GNorm = 0.5226, lr_0 = 1.4212e-04
Loss = 3.5289e-02, PNorm = 70.9084, GNorm = 0.4598, lr_0 = 1.4202e-04
Loss = 3.8361e-02, PNorm = 70.9087, GNorm = 0.4189, lr_0 = 1.4192e-04
Loss = 3.5726e-02, PNorm = 70.9098, GNorm = 0.4805, lr_0 = 1.4183e-04
Loss = 3.1106e-02, PNorm = 70.9121, GNorm = 0.3613, lr_0 = 1.4173e-04
Loss = 4.1672e-02, PNorm = 70.9143, GNorm = 0.4066, lr_0 = 1.4163e-04
Loss = 3.6494e-02, PNorm = 70.9170, GNorm = 0.3128, lr_0 = 1.4153e-04
Loss = 3.8966e-02, PNorm = 70.9193, GNorm = 0.4427, lr_0 = 1.4144e-04
Loss = 4.0711e-02, PNorm = 70.9209, GNorm = 0.6788, lr_0 = 1.4134e-04
Loss = 3.2680e-02, PNorm = 70.9230, GNorm = 0.4611, lr_0 = 1.4124e-04
Loss = 3.4585e-02, PNorm = 70.9244, GNorm = 0.5876, lr_0 = 1.4115e-04
Loss = 3.3326e-02, PNorm = 70.9262, GNorm = 0.6935, lr_0 = 1.4105e-04
Loss = 3.4309e-02, PNorm = 70.9283, GNorm = 0.4299, lr_0 = 1.4095e-04
Loss = 3.5183e-02, PNorm = 70.9307, GNorm = 0.4564, lr_0 = 1.4086e-04
Loss = 3.9045e-02, PNorm = 70.9327, GNorm = 0.6182, lr_0 = 1.4076e-04
Loss = 3.3680e-02, PNorm = 70.9344, GNorm = 0.4849, lr_0 = 1.4066e-04
Loss = 4.2006e-02, PNorm = 70.9370, GNorm = 0.3471, lr_0 = 1.4057e-04
Loss = 3.7493e-02, PNorm = 70.9380, GNorm = 0.7206, lr_0 = 1.4047e-04
Loss = 4.3908e-02, PNorm = 70.9386, GNorm = 0.6551, lr_0 = 1.4038e-04
Loss = 4.0958e-02, PNorm = 70.9420, GNorm = 0.5633, lr_0 = 1.4028e-04
Loss = 3.5300e-02, PNorm = 70.9441, GNorm = 0.4862, lr_0 = 1.4018e-04
Loss = 3.7969e-02, PNorm = 70.9463, GNorm = 0.5046, lr_0 = 1.4009e-04
Loss = 3.7742e-02, PNorm = 70.9489, GNorm = 0.4735, lr_0 = 1.3999e-04
Loss = 3.3649e-02, PNorm = 70.9513, GNorm = 0.5679, lr_0 = 1.3990e-04
Loss = 3.3180e-02, PNorm = 70.9539, GNorm = 0.4801, lr_0 = 1.3980e-04
Loss = 4.5234e-02, PNorm = 70.9555, GNorm = 0.5099, lr_0 = 1.3970e-04
Loss = 4.1615e-02, PNorm = 70.9564, GNorm = 0.4859, lr_0 = 1.3961e-04
Loss = 3.5275e-02, PNorm = 70.9569, GNorm = 0.5075, lr_0 = 1.3951e-04
Loss = 3.6938e-02, PNorm = 70.9581, GNorm = 0.5642, lr_0 = 1.3942e-04
Loss = 3.4292e-02, PNorm = 70.9599, GNorm = 0.4049, lr_0 = 1.3932e-04
Loss = 3.7355e-02, PNorm = 70.9610, GNorm = 0.4436, lr_0 = 1.3923e-04
Loss = 3.5649e-02, PNorm = 70.9623, GNorm = 0.6042, lr_0 = 1.3913e-04
Loss = 3.8319e-02, PNorm = 70.9632, GNorm = 0.7086, lr_0 = 1.3904e-04
Loss = 3.4426e-02, PNorm = 70.9649, GNorm = 0.6012, lr_0 = 1.3894e-04
Validation mae = 0.388740
Epoch 26
Loss = 3.0975e-02, PNorm = 70.9671, GNorm = 0.4196, lr_0 = 1.3884e-04
Loss = 3.2029e-02, PNorm = 70.9692, GNorm = 0.3852, lr_0 = 1.3875e-04
Loss = 2.8039e-02, PNorm = 70.9709, GNorm = 0.4213, lr_0 = 1.3865e-04
Loss = 3.2921e-02, PNorm = 70.9730, GNorm = 0.3337, lr_0 = 1.3856e-04
Loss = 3.1783e-02, PNorm = 70.9756, GNorm = 0.4221, lr_0 = 1.3846e-04
Loss = 3.4292e-02, PNorm = 70.9777, GNorm = 0.5769, lr_0 = 1.3837e-04
Loss = 3.0521e-02, PNorm = 70.9799, GNorm = 0.4720, lr_0 = 1.3828e-04
Loss = 3.0636e-02, PNorm = 70.9812, GNorm = 0.6769, lr_0 = 1.3818e-04
Loss = 3.1819e-02, PNorm = 70.9833, GNorm = 0.4322, lr_0 = 1.3809e-04
Loss = 2.7079e-02, PNorm = 70.9850, GNorm = 0.4881, lr_0 = 1.3799e-04
Loss = 3.4419e-02, PNorm = 70.9872, GNorm = 0.4390, lr_0 = 1.3790e-04
Loss = 3.3145e-02, PNorm = 70.9893, GNorm = 0.4230, lr_0 = 1.3780e-04
Loss = 3.0707e-02, PNorm = 70.9925, GNorm = 0.4325, lr_0 = 1.3771e-04
Loss = 3.1073e-02, PNorm = 70.9951, GNorm = 0.4757, lr_0 = 1.3761e-04
Loss = 2.8681e-02, PNorm = 70.9971, GNorm = 0.4708, lr_0 = 1.3752e-04
Loss = 3.2780e-02, PNorm = 70.9993, GNorm = 0.3482, lr_0 = 1.3742e-04
Loss = 3.0386e-02, PNorm = 71.0016, GNorm = 0.4827, lr_0 = 1.3733e-04
Loss = 3.4286e-02, PNorm = 71.0041, GNorm = 0.4860, lr_0 = 1.3724e-04
Loss = 3.2834e-02, PNorm = 71.0074, GNorm = 0.4634, lr_0 = 1.3714e-04
Loss = 3.0796e-02, PNorm = 71.0092, GNorm = 0.3859, lr_0 = 1.3705e-04
Loss = 2.4446e-02, PNorm = 71.0099, GNorm = 0.4358, lr_0 = 1.3695e-04
Loss = 3.3235e-02, PNorm = 71.0112, GNorm = 0.4304, lr_0 = 1.3686e-04
Loss = 3.0397e-02, PNorm = 71.0136, GNorm = 0.5310, lr_0 = 1.3677e-04
Loss = 2.8744e-02, PNorm = 71.0172, GNorm = 0.4501, lr_0 = 1.3667e-04
Loss = 2.7928e-02, PNorm = 71.0197, GNorm = 0.4087, lr_0 = 1.3658e-04
Loss = 3.7271e-02, PNorm = 71.0209, GNorm = 0.6988, lr_0 = 1.3649e-04
Loss = 3.5604e-02, PNorm = 71.0212, GNorm = 0.5750, lr_0 = 1.3639e-04
Loss = 3.1411e-02, PNorm = 71.0222, GNorm = 0.4429, lr_0 = 1.3630e-04
Loss = 3.2193e-02, PNorm = 71.0247, GNorm = 0.5240, lr_0 = 1.3621e-04
Loss = 3.4857e-02, PNorm = 71.0274, GNorm = 0.4958, lr_0 = 1.3611e-04
Loss = 3.3474e-02, PNorm = 71.0292, GNorm = 0.5313, lr_0 = 1.3602e-04
Loss = 3.4188e-02, PNorm = 71.0319, GNorm = 0.4204, lr_0 = 1.3593e-04
Loss = 3.4336e-02, PNorm = 71.0343, GNorm = 0.5425, lr_0 = 1.3583e-04
Loss = 2.7522e-02, PNorm = 71.0360, GNorm = 0.4604, lr_0 = 1.3574e-04
Loss = 2.7679e-02, PNorm = 71.0375, GNorm = 0.6963, lr_0 = 1.3565e-04
Loss = 3.1825e-02, PNorm = 71.0393, GNorm = 0.4180, lr_0 = 1.3555e-04
Loss = 3.1665e-02, PNorm = 71.0408, GNorm = 0.4603, lr_0 = 1.3546e-04
Loss = 3.5032e-02, PNorm = 71.0430, GNorm = 0.8882, lr_0 = 1.3537e-04
Loss = 3.5033e-02, PNorm = 71.0445, GNorm = 0.3968, lr_0 = 1.3528e-04
Loss = 3.6541e-02, PNorm = 71.0468, GNorm = 0.4082, lr_0 = 1.3518e-04
Loss = 3.6132e-02, PNorm = 71.0485, GNorm = 0.4996, lr_0 = 1.3509e-04
Loss = 3.7959e-02, PNorm = 71.0490, GNorm = 0.4909, lr_0 = 1.3500e-04
Loss = 3.4594e-02, PNorm = 71.0505, GNorm = 0.5010, lr_0 = 1.3491e-04
Loss = 2.8362e-02, PNorm = 71.0526, GNorm = 0.5216, lr_0 = 1.3481e-04
Loss = 3.1059e-02, PNorm = 71.0545, GNorm = 0.5028, lr_0 = 1.3472e-04
Loss = 3.2363e-02, PNorm = 71.0571, GNorm = 0.5086, lr_0 = 1.3463e-04
Loss = 3.1919e-02, PNorm = 71.0599, GNorm = 0.5089, lr_0 = 1.3454e-04
Loss = 3.2043e-02, PNorm = 71.0618, GNorm = 0.4212, lr_0 = 1.3444e-04
Loss = 3.3959e-02, PNorm = 71.0634, GNorm = 0.3687, lr_0 = 1.3435e-04
Loss = 3.3448e-02, PNorm = 71.0650, GNorm = 0.6022, lr_0 = 1.3426e-04
Loss = 3.5494e-02, PNorm = 71.0659, GNorm = 0.4642, lr_0 = 1.3417e-04
Loss = 2.8721e-02, PNorm = 71.0669, GNorm = 0.4188, lr_0 = 1.3408e-04
Loss = 3.0306e-02, PNorm = 71.0689, GNorm = 0.4353, lr_0 = 1.3398e-04
Loss = 3.2004e-02, PNorm = 71.0703, GNorm = 0.5632, lr_0 = 1.3389e-04
Loss = 3.2765e-02, PNorm = 71.0728, GNorm = 0.4762, lr_0 = 1.3380e-04
Loss = 3.0367e-02, PNorm = 71.0739, GNorm = 0.4876, lr_0 = 1.3371e-04
Loss = 3.5292e-02, PNorm = 71.0757, GNorm = 0.5201, lr_0 = 1.3362e-04
Loss = 3.1426e-02, PNorm = 71.0775, GNorm = 0.5884, lr_0 = 1.3353e-04
Loss = 3.3746e-02, PNorm = 71.0781, GNorm = 1.1231, lr_0 = 1.3343e-04
Loss = 3.3601e-02, PNorm = 71.0802, GNorm = 0.4293, lr_0 = 1.3334e-04
Loss = 3.1847e-02, PNorm = 71.0821, GNorm = 0.5060, lr_0 = 1.3325e-04
Loss = 3.9373e-02, PNorm = 71.0839, GNorm = 0.4095, lr_0 = 1.3316e-04
Loss = 3.6701e-02, PNorm = 71.0858, GNorm = 0.4402, lr_0 = 1.3307e-04
Loss = 3.2420e-02, PNorm = 71.0877, GNorm = 0.6731, lr_0 = 1.3298e-04
Loss = 3.2941e-02, PNorm = 71.0902, GNorm = 0.3978, lr_0 = 1.3289e-04
Loss = 3.1092e-02, PNorm = 71.0925, GNorm = 0.3368, lr_0 = 1.3280e-04
Loss = 3.4077e-02, PNorm = 71.0939, GNorm = 0.3827, lr_0 = 1.3270e-04
Loss = 2.9243e-02, PNorm = 71.0951, GNorm = 0.6421, lr_0 = 1.3261e-04
Loss = 3.2968e-02, PNorm = 71.0967, GNorm = 0.5194, lr_0 = 1.3252e-04
Loss = 2.9132e-02, PNorm = 71.0974, GNorm = 0.3473, lr_0 = 1.3243e-04
Loss = 3.5301e-02, PNorm = 71.0990, GNorm = 0.4843, lr_0 = 1.3234e-04
Loss = 3.0252e-02, PNorm = 71.1003, GNorm = 0.6073, lr_0 = 1.3225e-04
Loss = 3.1769e-02, PNorm = 71.1023, GNorm = 0.3768, lr_0 = 1.3216e-04
Loss = 2.8343e-02, PNorm = 71.1034, GNorm = 0.5005, lr_0 = 1.3207e-04
Loss = 3.7754e-02, PNorm = 71.1043, GNorm = 0.5653, lr_0 = 1.3198e-04
Loss = 3.1569e-02, PNorm = 71.1058, GNorm = 0.5283, lr_0 = 1.3189e-04
Loss = 3.4507e-02, PNorm = 71.1073, GNorm = 0.4676, lr_0 = 1.3180e-04
Loss = 2.9087e-02, PNorm = 71.1094, GNorm = 0.3964, lr_0 = 1.3171e-04
Loss = 3.8386e-02, PNorm = 71.1114, GNorm = 0.4497, lr_0 = 1.3162e-04
Loss = 3.5446e-02, PNorm = 71.1138, GNorm = 0.3842, lr_0 = 1.3153e-04
Loss = 3.0816e-02, PNorm = 71.1158, GNorm = 0.4446, lr_0 = 1.3144e-04
Loss = 3.7385e-02, PNorm = 71.1172, GNorm = 0.4813, lr_0 = 1.3135e-04
Loss = 3.4734e-02, PNorm = 71.1193, GNorm = 0.3702, lr_0 = 1.3126e-04
Loss = 3.1169e-02, PNorm = 71.1217, GNorm = 0.4900, lr_0 = 1.3117e-04
Loss = 3.3632e-02, PNorm = 71.1235, GNorm = 0.5104, lr_0 = 1.3108e-04
Loss = 3.4089e-02, PNorm = 71.1257, GNorm = 0.4018, lr_0 = 1.3099e-04
Loss = 3.5980e-02, PNorm = 71.1291, GNorm = 0.5352, lr_0 = 1.3090e-04
Loss = 3.3853e-02, PNorm = 71.1316, GNorm = 0.4009, lr_0 = 1.3081e-04
Loss = 3.3053e-02, PNorm = 71.1327, GNorm = 0.6127, lr_0 = 1.3072e-04
Loss = 3.5346e-02, PNorm = 71.1352, GNorm = 0.3994, lr_0 = 1.3063e-04
Loss = 3.4738e-02, PNorm = 71.1374, GNorm = 0.4126, lr_0 = 1.3054e-04
Loss = 3.6475e-02, PNorm = 71.1396, GNorm = 0.4530, lr_0 = 1.3045e-04
Loss = 3.2192e-02, PNorm = 71.1418, GNorm = 0.4192, lr_0 = 1.3036e-04
Loss = 4.1651e-02, PNorm = 71.1425, GNorm = 0.5158, lr_0 = 1.3027e-04
Loss = 3.5018e-02, PNorm = 71.1435, GNorm = 0.4861, lr_0 = 1.3018e-04
Loss = 3.9722e-02, PNorm = 71.1441, GNorm = 0.7267, lr_0 = 1.3009e-04
Loss = 3.7456e-02, PNorm = 71.1463, GNorm = 0.6097, lr_0 = 1.3000e-04
Loss = 3.7319e-02, PNorm = 71.1485, GNorm = 0.7632, lr_0 = 1.2992e-04
Loss = 3.6026e-02, PNorm = 71.1509, GNorm = 0.5105, lr_0 = 1.2983e-04
Loss = 3.4075e-02, PNorm = 71.1525, GNorm = 0.5270, lr_0 = 1.2974e-04
Loss = 3.6728e-02, PNorm = 71.1542, GNorm = 0.4842, lr_0 = 1.2965e-04
Loss = 3.2487e-02, PNorm = 71.1566, GNorm = 0.3958, lr_0 = 1.2956e-04
Loss = 4.8920e-02, PNorm = 71.1593, GNorm = 0.6428, lr_0 = 1.2947e-04
Loss = 3.4415e-02, PNorm = 71.1630, GNorm = 0.3737, lr_0 = 1.2938e-04
Loss = 3.4394e-02, PNorm = 71.1652, GNorm = 0.3659, lr_0 = 1.2929e-04
Loss = 3.4178e-02, PNorm = 71.1669, GNorm = 0.4682, lr_0 = 1.2921e-04
Loss = 3.2219e-02, PNorm = 71.1697, GNorm = 0.5518, lr_0 = 1.2912e-04
Loss = 3.4361e-02, PNorm = 71.1706, GNorm = 0.4924, lr_0 = 1.2903e-04
Loss = 3.9660e-02, PNorm = 71.1719, GNorm = 0.8536, lr_0 = 1.2894e-04
Loss = 3.4771e-02, PNorm = 71.1743, GNorm = 0.7929, lr_0 = 1.2885e-04
Loss = 3.4586e-02, PNorm = 71.1753, GNorm = 0.4169, lr_0 = 1.2876e-04
Loss = 3.2425e-02, PNorm = 71.1765, GNorm = 0.5236, lr_0 = 1.2867e-04
Loss = 3.6307e-02, PNorm = 71.1782, GNorm = 0.6827, lr_0 = 1.2859e-04
Loss = 4.5480e-02, PNorm = 71.1796, GNorm = 0.9511, lr_0 = 1.2850e-04
Loss = 4.0630e-02, PNorm = 71.1804, GNorm = 0.5256, lr_0 = 1.2841e-04
Loss = 3.7758e-02, PNorm = 71.1837, GNorm = 0.4343, lr_0 = 1.2832e-04
Loss = 3.1686e-02, PNorm = 71.1864, GNorm = 0.5870, lr_0 = 1.2823e-04
Loss = 4.1382e-02, PNorm = 71.1888, GNorm = 0.5573, lr_0 = 1.2815e-04
Loss = 3.7478e-02, PNorm = 71.1911, GNorm = 0.4151, lr_0 = 1.2806e-04
Loss = 3.5745e-02, PNorm = 71.1924, GNorm = 0.3843, lr_0 = 1.2797e-04
Validation mae = 0.389215
Epoch 27
Loss = 3.0769e-02, PNorm = 71.1943, GNorm = 0.7171, lr_0 = 1.2788e-04
Loss = 3.4455e-02, PNorm = 71.1961, GNorm = 0.4422, lr_0 = 1.2780e-04
Loss = 2.9860e-02, PNorm = 71.1992, GNorm = 0.3797, lr_0 = 1.2771e-04
Loss = 2.9484e-02, PNorm = 71.2010, GNorm = 0.6652, lr_0 = 1.2762e-04
Loss = 2.7666e-02, PNorm = 71.2031, GNorm = 0.3856, lr_0 = 1.2753e-04
Loss = 3.4481e-02, PNorm = 71.2052, GNorm = 0.5144, lr_0 = 1.2745e-04
Loss = 3.2687e-02, PNorm = 71.2074, GNorm = 0.4680, lr_0 = 1.2736e-04
Loss = 2.9284e-02, PNorm = 71.2083, GNorm = 0.6163, lr_0 = 1.2727e-04
Loss = 2.7939e-02, PNorm = 71.2089, GNorm = 0.5361, lr_0 = 1.2718e-04
Loss = 2.9932e-02, PNorm = 71.2104, GNorm = 0.6521, lr_0 = 1.2710e-04
Loss = 2.9545e-02, PNorm = 71.2119, GNorm = 0.4470, lr_0 = 1.2701e-04
Loss = 2.9027e-02, PNorm = 71.2140, GNorm = 0.5017, lr_0 = 1.2692e-04
Loss = 2.5896e-02, PNorm = 71.2157, GNorm = 0.7787, lr_0 = 1.2684e-04
Loss = 3.0782e-02, PNorm = 71.2176, GNorm = 0.4470, lr_0 = 1.2675e-04
Loss = 3.3735e-02, PNorm = 71.2193, GNorm = 0.4284, lr_0 = 1.2666e-04
Loss = 3.4443e-02, PNorm = 71.2206, GNorm = 0.4135, lr_0 = 1.2658e-04
Loss = 3.2998e-02, PNorm = 71.2221, GNorm = 0.4142, lr_0 = 1.2649e-04
Loss = 2.9189e-02, PNorm = 71.2235, GNorm = 0.4329, lr_0 = 1.2640e-04
Loss = 2.7619e-02, PNorm = 71.2264, GNorm = 0.6862, lr_0 = 1.2632e-04
Loss = 2.8383e-02, PNorm = 71.2286, GNorm = 0.3449, lr_0 = 1.2623e-04
Loss = 3.2106e-02, PNorm = 71.2301, GNorm = 0.4312, lr_0 = 1.2614e-04
Loss = 3.2803e-02, PNorm = 71.2316, GNorm = 0.7185, lr_0 = 1.2606e-04
Loss = 2.8686e-02, PNorm = 71.2331, GNorm = 0.6474, lr_0 = 1.2597e-04
Loss = 2.8832e-02, PNorm = 71.2342, GNorm = 0.5201, lr_0 = 1.2588e-04
Loss = 3.1511e-02, PNorm = 71.2360, GNorm = 0.3382, lr_0 = 1.2580e-04
Loss = 3.1627e-02, PNorm = 71.2387, GNorm = 0.4633, lr_0 = 1.2571e-04
Loss = 2.8018e-02, PNorm = 71.2399, GNorm = 0.3759, lr_0 = 1.2563e-04
Loss = 3.5329e-02, PNorm = 71.2416, GNorm = 0.7397, lr_0 = 1.2554e-04
Loss = 3.0914e-02, PNorm = 71.2426, GNorm = 0.5916, lr_0 = 1.2545e-04
Loss = 3.4162e-02, PNorm = 71.2432, GNorm = 0.6336, lr_0 = 1.2537e-04
Loss = 3.2673e-02, PNorm = 71.2445, GNorm = 0.4122, lr_0 = 1.2528e-04
Loss = 3.4702e-02, PNorm = 71.2468, GNorm = 0.4935, lr_0 = 1.2520e-04
Loss = 3.0827e-02, PNorm = 71.2492, GNorm = 0.4415, lr_0 = 1.2511e-04
Loss = 2.6469e-02, PNorm = 71.2515, GNorm = 0.4298, lr_0 = 1.2502e-04
Loss = 3.3849e-02, PNorm = 71.2545, GNorm = 0.6743, lr_0 = 1.2494e-04
Loss = 3.2670e-02, PNorm = 71.2581, GNorm = 0.3672, lr_0 = 1.2485e-04
Loss = 3.5511e-02, PNorm = 71.2601, GNorm = 0.4682, lr_0 = 1.2477e-04
Loss = 3.2016e-02, PNorm = 71.2624, GNorm = 0.5093, lr_0 = 1.2468e-04
Loss = 2.8871e-02, PNorm = 71.2637, GNorm = 0.4699, lr_0 = 1.2460e-04
Loss = 3.3513e-02, PNorm = 71.2649, GNorm = 0.4143, lr_0 = 1.2451e-04
Loss = 2.8193e-02, PNorm = 71.2667, GNorm = 0.5990, lr_0 = 1.2443e-04
Loss = 3.1509e-02, PNorm = 71.2682, GNorm = 0.5972, lr_0 = 1.2434e-04
Loss = 3.1067e-02, PNorm = 71.2701, GNorm = 0.5182, lr_0 = 1.2426e-04
Loss = 2.9419e-02, PNorm = 71.2724, GNorm = 0.4073, lr_0 = 1.2417e-04
Loss = 2.9934e-02, PNorm = 71.2743, GNorm = 0.3576, lr_0 = 1.2409e-04
Loss = 3.6196e-02, PNorm = 71.2764, GNorm = 0.6131, lr_0 = 1.2400e-04
Loss = 3.0836e-02, PNorm = 71.2787, GNorm = 0.6108, lr_0 = 1.2392e-04
Loss = 2.8547e-02, PNorm = 71.2807, GNorm = 0.4180, lr_0 = 1.2383e-04
Loss = 3.4186e-02, PNorm = 71.2834, GNorm = 0.4668, lr_0 = 1.2375e-04
Loss = 2.4541e-02, PNorm = 71.2859, GNorm = 0.4668, lr_0 = 1.2366e-04
Loss = 3.5133e-02, PNorm = 71.2874, GNorm = 0.4349, lr_0 = 1.2358e-04
Loss = 3.4311e-02, PNorm = 71.2895, GNorm = 0.5091, lr_0 = 1.2349e-04
Loss = 3.6234e-02, PNorm = 71.2917, GNorm = 0.5770, lr_0 = 1.2341e-04
Loss = 3.1485e-02, PNorm = 71.2936, GNorm = 0.4228, lr_0 = 1.2332e-04
Loss = 3.4792e-02, PNorm = 71.2955, GNorm = 0.4862, lr_0 = 1.2324e-04
Loss = 3.1568e-02, PNorm = 71.2966, GNorm = 0.4115, lr_0 = 1.2315e-04
Loss = 3.6356e-02, PNorm = 71.2979, GNorm = 0.6705, lr_0 = 1.2307e-04
Loss = 3.3801e-02, PNorm = 71.2993, GNorm = 0.4185, lr_0 = 1.2298e-04
Loss = 3.4971e-02, PNorm = 71.2999, GNorm = 0.5570, lr_0 = 1.2290e-04
Loss = 2.7330e-02, PNorm = 71.3015, GNorm = 0.5633, lr_0 = 1.2282e-04
Loss = 3.1518e-02, PNorm = 71.3027, GNorm = 0.4634, lr_0 = 1.2273e-04
Loss = 2.8736e-02, PNorm = 71.3041, GNorm = 0.7648, lr_0 = 1.2265e-04
Loss = 3.1739e-02, PNorm = 71.3064, GNorm = 0.5159, lr_0 = 1.2256e-04
Loss = 3.6219e-02, PNorm = 71.3096, GNorm = 0.4804, lr_0 = 1.2248e-04
Loss = 2.8532e-02, PNorm = 71.3116, GNorm = 0.3143, lr_0 = 1.2240e-04
Loss = 2.8228e-02, PNorm = 71.3134, GNorm = 0.5813, lr_0 = 1.2231e-04
Loss = 2.7468e-02, PNorm = 71.3146, GNorm = 0.4852, lr_0 = 1.2223e-04
Loss = 3.1212e-02, PNorm = 71.3163, GNorm = 0.3817, lr_0 = 1.2214e-04
Loss = 3.2599e-02, PNorm = 71.3179, GNorm = 0.6499, lr_0 = 1.2206e-04
Loss = 3.5000e-02, PNorm = 71.3193, GNorm = 0.4202, lr_0 = 1.2198e-04
Loss = 2.9232e-02, PNorm = 71.3205, GNorm = 0.5588, lr_0 = 1.2189e-04
Loss = 2.9221e-02, PNorm = 71.3217, GNorm = 0.5732, lr_0 = 1.2181e-04
Loss = 3.3548e-02, PNorm = 71.3233, GNorm = 0.5127, lr_0 = 1.2173e-04
Loss = 3.4891e-02, PNorm = 71.3246, GNorm = 0.4225, lr_0 = 1.2164e-04
Loss = 3.8526e-02, PNorm = 71.3278, GNorm = 0.6587, lr_0 = 1.2156e-04
Loss = 2.8338e-02, PNorm = 71.3300, GNorm = 0.5299, lr_0 = 1.2148e-04
Loss = 3.4559e-02, PNorm = 71.3320, GNorm = 0.4730, lr_0 = 1.2139e-04
Loss = 3.3805e-02, PNorm = 71.3342, GNorm = 0.6817, lr_0 = 1.2131e-04
Loss = 3.3299e-02, PNorm = 71.3360, GNorm = 0.6643, lr_0 = 1.2123e-04
Loss = 2.8227e-02, PNorm = 71.3371, GNorm = 0.3934, lr_0 = 1.2114e-04
Loss = 3.1946e-02, PNorm = 71.3382, GNorm = 0.5461, lr_0 = 1.2106e-04
Loss = 3.0717e-02, PNorm = 71.3396, GNorm = 0.5419, lr_0 = 1.2098e-04
Loss = 3.2895e-02, PNorm = 71.3407, GNorm = 0.4117, lr_0 = 1.2090e-04
Loss = 3.8485e-02, PNorm = 71.3420, GNorm = 0.5298, lr_0 = 1.2081e-04
Loss = 3.1574e-02, PNorm = 71.3433, GNorm = 0.6627, lr_0 = 1.2073e-04
Loss = 3.5967e-02, PNorm = 71.3444, GNorm = 0.5448, lr_0 = 1.2065e-04
Loss = 2.9480e-02, PNorm = 71.3457, GNorm = 0.6847, lr_0 = 1.2056e-04
Loss = 3.5734e-02, PNorm = 71.3472, GNorm = 0.4343, lr_0 = 1.2048e-04
Loss = 3.8047e-02, PNorm = 71.3492, GNorm = 0.3442, lr_0 = 1.2040e-04
Loss = 2.8161e-02, PNorm = 71.3515, GNorm = 0.3294, lr_0 = 1.2032e-04
Loss = 3.0163e-02, PNorm = 71.3534, GNorm = 0.4745, lr_0 = 1.2023e-04
Loss = 3.3319e-02, PNorm = 71.3546, GNorm = 0.5985, lr_0 = 1.2015e-04
Loss = 3.7630e-02, PNorm = 71.3558, GNorm = 0.4711, lr_0 = 1.2007e-04
Loss = 3.5332e-02, PNorm = 71.3578, GNorm = 0.6158, lr_0 = 1.1999e-04
Loss = 3.4826e-02, PNorm = 71.3587, GNorm = 0.7758, lr_0 = 1.1991e-04
Loss = 3.1544e-02, PNorm = 71.3610, GNorm = 0.3633, lr_0 = 1.1982e-04
Loss = 3.0395e-02, PNorm = 71.3623, GNorm = 0.3673, lr_0 = 1.1974e-04
Loss = 3.3000e-02, PNorm = 71.3626, GNorm = 0.5222, lr_0 = 1.1966e-04
Loss = 2.8306e-02, PNorm = 71.3627, GNorm = 0.4130, lr_0 = 1.1958e-04
Loss = 2.8220e-02, PNorm = 71.3637, GNorm = 0.4535, lr_0 = 1.1950e-04
Loss = 3.1590e-02, PNorm = 71.3649, GNorm = 0.5971, lr_0 = 1.1941e-04
Loss = 3.7485e-02, PNorm = 71.3660, GNorm = 0.4056, lr_0 = 1.1933e-04
Loss = 2.8419e-02, PNorm = 71.3676, GNorm = 0.4018, lr_0 = 1.1925e-04
Loss = 2.7658e-02, PNorm = 71.3689, GNorm = 0.3665, lr_0 = 1.1917e-04
Loss = 2.7752e-02, PNorm = 71.3701, GNorm = 0.4589, lr_0 = 1.1909e-04
Loss = 3.3211e-02, PNorm = 71.3716, GNorm = 0.4572, lr_0 = 1.1901e-04
Loss = 3.8001e-02, PNorm = 71.3734, GNorm = 0.5334, lr_0 = 1.1892e-04
Loss = 3.5468e-02, PNorm = 71.3741, GNorm = 0.5459, lr_0 = 1.1884e-04
Loss = 2.9868e-02, PNorm = 71.3762, GNorm = 0.7016, lr_0 = 1.1876e-04
Loss = 3.4558e-02, PNorm = 71.3782, GNorm = 0.6826, lr_0 = 1.1868e-04
Loss = 3.3220e-02, PNorm = 71.3798, GNorm = 0.5040, lr_0 = 1.1860e-04
Loss = 3.3814e-02, PNorm = 71.3811, GNorm = 0.9119, lr_0 = 1.1852e-04
Loss = 3.0775e-02, PNorm = 71.3824, GNorm = 0.5012, lr_0 = 1.1844e-04
Loss = 3.3215e-02, PNorm = 71.3838, GNorm = 0.5993, lr_0 = 1.1835e-04
Loss = 2.8972e-02, PNorm = 71.3855, GNorm = 0.4244, lr_0 = 1.1827e-04
Loss = 3.2793e-02, PNorm = 71.3872, GNorm = 0.4115, lr_0 = 1.1819e-04
Loss = 3.7636e-02, PNorm = 71.3889, GNorm = 0.5349, lr_0 = 1.1811e-04
Loss = 3.7175e-02, PNorm = 71.3901, GNorm = 0.5716, lr_0 = 1.1803e-04
Loss = 3.0861e-02, PNorm = 71.3917, GNorm = 0.5011, lr_0 = 1.1795e-04
Loss = 3.2787e-02, PNorm = 71.3922, GNorm = 0.5298, lr_0 = 1.1787e-04
Validation mae = 0.386560
Epoch 28
Loss = 3.0558e-02, PNorm = 71.3942, GNorm = 0.4644, lr_0 = 1.1779e-04
Loss = 2.5967e-02, PNorm = 71.3954, GNorm = 0.4790, lr_0 = 1.1771e-04
Loss = 3.0168e-02, PNorm = 71.3968, GNorm = 0.3369, lr_0 = 1.1763e-04
Loss = 2.8039e-02, PNorm = 71.3982, GNorm = 0.3474, lr_0 = 1.1755e-04
Loss = 2.7339e-02, PNorm = 71.4007, GNorm = 0.4807, lr_0 = 1.1747e-04
Loss = 3.1928e-02, PNorm = 71.4029, GNorm = 0.5486, lr_0 = 1.1739e-04
Loss = 2.7074e-02, PNorm = 71.4056, GNorm = 0.4174, lr_0 = 1.1730e-04
Loss = 2.5174e-02, PNorm = 71.4082, GNorm = 0.4341, lr_0 = 1.1722e-04
Loss = 2.7263e-02, PNorm = 71.4100, GNorm = 0.6669, lr_0 = 1.1714e-04
Loss = 3.2102e-02, PNorm = 71.4121, GNorm = 0.3861, lr_0 = 1.1706e-04
Loss = 2.7839e-02, PNorm = 71.4147, GNorm = 0.4467, lr_0 = 1.1698e-04
Loss = 2.5006e-02, PNorm = 71.4164, GNorm = 0.3811, lr_0 = 1.1690e-04
Loss = 2.7146e-02, PNorm = 71.4169, GNorm = 0.4265, lr_0 = 1.1682e-04
Loss = 3.4421e-02, PNorm = 71.4185, GNorm = 0.4696, lr_0 = 1.1674e-04
Loss = 3.0142e-02, PNorm = 71.4205, GNorm = 0.4479, lr_0 = 1.1666e-04
Loss = 2.6833e-02, PNorm = 71.4215, GNorm = 0.4661, lr_0 = 1.1658e-04
Loss = 3.2027e-02, PNorm = 71.4228, GNorm = 0.5123, lr_0 = 1.1650e-04
Loss = 3.2152e-02, PNorm = 71.4249, GNorm = 0.3157, lr_0 = 1.1642e-04
Loss = 2.7373e-02, PNorm = 71.4268, GNorm = 0.4288, lr_0 = 1.1634e-04
Loss = 2.8366e-02, PNorm = 71.4285, GNorm = 0.4155, lr_0 = 1.1626e-04
Loss = 2.3695e-02, PNorm = 71.4300, GNorm = 0.4123, lr_0 = 1.1618e-04
Loss = 2.9799e-02, PNorm = 71.4304, GNorm = 0.5413, lr_0 = 1.1611e-04
Loss = 2.8965e-02, PNorm = 71.4317, GNorm = 0.3297, lr_0 = 1.1603e-04
Loss = 2.6306e-02, PNorm = 71.4330, GNorm = 0.5664, lr_0 = 1.1595e-04
Loss = 3.0355e-02, PNorm = 71.4347, GNorm = 0.4270, lr_0 = 1.1587e-04
Loss = 2.9777e-02, PNorm = 71.4363, GNorm = 0.5401, lr_0 = 1.1579e-04
Loss = 3.0401e-02, PNorm = 71.4372, GNorm = 0.4633, lr_0 = 1.1571e-04
Loss = 3.0085e-02, PNorm = 71.4384, GNorm = 0.4044, lr_0 = 1.1563e-04
Loss = 3.2028e-02, PNorm = 71.4402, GNorm = 0.4429, lr_0 = 1.1555e-04
Loss = 2.7637e-02, PNorm = 71.4423, GNorm = 0.4340, lr_0 = 1.1547e-04
Loss = 3.1272e-02, PNorm = 71.4443, GNorm = 0.4342, lr_0 = 1.1539e-04
Loss = 2.7209e-02, PNorm = 71.4468, GNorm = 0.2817, lr_0 = 1.1531e-04
Loss = 2.8625e-02, PNorm = 71.4488, GNorm = 0.3587, lr_0 = 1.1523e-04
Loss = 3.1575e-02, PNorm = 71.4506, GNorm = 0.3777, lr_0 = 1.1515e-04
Loss = 2.8532e-02, PNorm = 71.4523, GNorm = 0.4099, lr_0 = 1.1508e-04
Loss = 2.6986e-02, PNorm = 71.4542, GNorm = 0.6273, lr_0 = 1.1500e-04
Loss = 3.2778e-02, PNorm = 71.4545, GNorm = 0.3857, lr_0 = 1.1492e-04
Loss = 3.6400e-02, PNorm = 71.4559, GNorm = 0.4523, lr_0 = 1.1484e-04
Loss = 3.3842e-02, PNorm = 71.4586, GNorm = 0.5277, lr_0 = 1.1476e-04
Loss = 3.0198e-02, PNorm = 71.4600, GNorm = 0.5225, lr_0 = 1.1468e-04
Loss = 3.3455e-02, PNorm = 71.4618, GNorm = 0.4858, lr_0 = 1.1460e-04
Loss = 2.9570e-02, PNorm = 71.4626, GNorm = 0.6247, lr_0 = 1.1452e-04
Loss = 2.9486e-02, PNorm = 71.4634, GNorm = 0.4045, lr_0 = 1.1445e-04
Loss = 2.6948e-02, PNorm = 71.4649, GNorm = 0.3558, lr_0 = 1.1437e-04
Loss = 2.9830e-02, PNorm = 71.4663, GNorm = 0.3548, lr_0 = 1.1429e-04
Loss = 3.0443e-02, PNorm = 71.4675, GNorm = 0.4582, lr_0 = 1.1421e-04
Loss = 3.3700e-02, PNorm = 71.4688, GNorm = 0.4746, lr_0 = 1.1413e-04
Loss = 3.1399e-02, PNorm = 71.4695, GNorm = 0.4952, lr_0 = 1.1405e-04
Loss = 2.8020e-02, PNorm = 71.4708, GNorm = 0.3714, lr_0 = 1.1398e-04
Loss = 2.5049e-02, PNorm = 71.4725, GNorm = 0.5906, lr_0 = 1.1390e-04
Loss = 2.9582e-02, PNorm = 71.4739, GNorm = 0.4746, lr_0 = 1.1382e-04
Loss = 3.0449e-02, PNorm = 71.4755, GNorm = 0.6195, lr_0 = 1.1374e-04
Loss = 2.7929e-02, PNorm = 71.4773, GNorm = 0.3527, lr_0 = 1.1366e-04
Loss = 3.0368e-02, PNorm = 71.4784, GNorm = 0.4186, lr_0 = 1.1359e-04
Loss = 2.7705e-02, PNorm = 71.4795, GNorm = 0.6377, lr_0 = 1.1351e-04
Loss = 3.2533e-02, PNorm = 71.4818, GNorm = 0.6820, lr_0 = 1.1343e-04
Loss = 3.5262e-02, PNorm = 71.4839, GNorm = 0.4941, lr_0 = 1.1335e-04
Loss = 3.0385e-02, PNorm = 71.4850, GNorm = 0.5017, lr_0 = 1.1328e-04
Loss = 2.6704e-02, PNorm = 71.4857, GNorm = 0.4576, lr_0 = 1.1320e-04
Loss = 3.2800e-02, PNorm = 71.4866, GNorm = 0.5823, lr_0 = 1.1312e-04
Loss = 3.5518e-02, PNorm = 71.4889, GNorm = 0.4926, lr_0 = 1.1304e-04
Loss = 3.0542e-02, PNorm = 71.4910, GNorm = 0.5182, lr_0 = 1.1297e-04
Loss = 3.3886e-02, PNorm = 71.4916, GNorm = 0.3143, lr_0 = 1.1289e-04
Loss = 3.6432e-02, PNorm = 71.4931, GNorm = 0.5161, lr_0 = 1.1281e-04
Loss = 2.8731e-02, PNorm = 71.4946, GNorm = 0.3951, lr_0 = 1.1273e-04
Loss = 3.4391e-02, PNorm = 71.4958, GNorm = 0.4666, lr_0 = 1.1266e-04
Loss = 2.7157e-02, PNorm = 71.4968, GNorm = 0.4343, lr_0 = 1.1258e-04
Loss = 3.1326e-02, PNorm = 71.4988, GNorm = 0.6289, lr_0 = 1.1250e-04
Loss = 3.1506e-02, PNorm = 71.5008, GNorm = 0.4746, lr_0 = 1.1243e-04
Loss = 3.2011e-02, PNorm = 71.5029, GNorm = 0.4722, lr_0 = 1.1235e-04
Loss = 3.2181e-02, PNorm = 71.5036, GNorm = 0.3883, lr_0 = 1.1227e-04
Loss = 3.0112e-02, PNorm = 71.5042, GNorm = 0.5845, lr_0 = 1.1219e-04
Loss = 2.7707e-02, PNorm = 71.5046, GNorm = 0.3315, lr_0 = 1.1212e-04
Loss = 2.7382e-02, PNorm = 71.5052, GNorm = 0.5230, lr_0 = 1.1204e-04
Loss = 3.5172e-02, PNorm = 71.5073, GNorm = 0.4488, lr_0 = 1.1196e-04
Loss = 3.0480e-02, PNorm = 71.5084, GNorm = 0.5623, lr_0 = 1.1189e-04
Loss = 3.2311e-02, PNorm = 71.5094, GNorm = 0.4539, lr_0 = 1.1181e-04
Loss = 3.1751e-02, PNorm = 71.5110, GNorm = 0.5428, lr_0 = 1.1173e-04
Loss = 2.9661e-02, PNorm = 71.5125, GNorm = 0.4518, lr_0 = 1.1166e-04
Loss = 2.9728e-02, PNorm = 71.5140, GNorm = 0.3929, lr_0 = 1.1158e-04
Loss = 2.7093e-02, PNorm = 71.5156, GNorm = 0.5301, lr_0 = 1.1150e-04
Loss = 3.0714e-02, PNorm = 71.5176, GNorm = 0.5828, lr_0 = 1.1143e-04
Loss = 3.2716e-02, PNorm = 71.5199, GNorm = 0.6060, lr_0 = 1.1135e-04
Loss = 3.1536e-02, PNorm = 71.5226, GNorm = 0.4429, lr_0 = 1.1128e-04
Loss = 3.0848e-02, PNorm = 71.5247, GNorm = 0.5391, lr_0 = 1.1120e-04
Loss = 3.1190e-02, PNorm = 71.5264, GNorm = 0.6259, lr_0 = 1.1112e-04
Loss = 3.0405e-02, PNorm = 71.5279, GNorm = 0.4840, lr_0 = 1.1105e-04
Loss = 3.5576e-02, PNorm = 71.5292, GNorm = 0.4452, lr_0 = 1.1097e-04
Loss = 3.1599e-02, PNorm = 71.5310, GNorm = 0.6417, lr_0 = 1.1089e-04
Loss = 3.0872e-02, PNorm = 71.5322, GNorm = 0.5383, lr_0 = 1.1082e-04
Loss = 3.1997e-02, PNorm = 71.5341, GNorm = 0.6305, lr_0 = 1.1074e-04
Loss = 2.7417e-02, PNorm = 71.5347, GNorm = 0.4811, lr_0 = 1.1067e-04
Loss = 3.0122e-02, PNorm = 71.5360, GNorm = 0.4421, lr_0 = 1.1059e-04
Loss = 2.8760e-02, PNorm = 71.5378, GNorm = 0.4885, lr_0 = 1.1052e-04
Loss = 2.8585e-02, PNorm = 71.5388, GNorm = 0.4318, lr_0 = 1.1044e-04
Loss = 2.8868e-02, PNorm = 71.5394, GNorm = 0.6402, lr_0 = 1.1036e-04
Loss = 3.2875e-02, PNorm = 71.5412, GNorm = 0.5167, lr_0 = 1.1029e-04
Loss = 3.1717e-02, PNorm = 71.5428, GNorm = 0.4614, lr_0 = 1.1021e-04
Loss = 2.9551e-02, PNorm = 71.5447, GNorm = 0.5008, lr_0 = 1.1014e-04
Loss = 3.7668e-02, PNorm = 71.5463, GNorm = 0.4894, lr_0 = 1.1006e-04
Loss = 3.4472e-02, PNorm = 71.5475, GNorm = 0.3854, lr_0 = 1.0999e-04
Loss = 2.7869e-02, PNorm = 71.5483, GNorm = 0.4377, lr_0 = 1.0991e-04
Loss = 3.4135e-02, PNorm = 71.5498, GNorm = 0.5014, lr_0 = 1.0984e-04
Loss = 2.8646e-02, PNorm = 71.5518, GNorm = 0.6319, lr_0 = 1.0976e-04
Loss = 2.9397e-02, PNorm = 71.5530, GNorm = 0.4878, lr_0 = 1.0969e-04
Loss = 2.9153e-02, PNorm = 71.5541, GNorm = 0.5133, lr_0 = 1.0961e-04
Loss = 2.7228e-02, PNorm = 71.5548, GNorm = 0.5420, lr_0 = 1.0954e-04
Loss = 2.5734e-02, PNorm = 71.5553, GNorm = 0.3597, lr_0 = 1.0946e-04
Loss = 3.4108e-02, PNorm = 71.5572, GNorm = 0.4238, lr_0 = 1.0939e-04
Loss = 3.2715e-02, PNorm = 71.5592, GNorm = 0.5294, lr_0 = 1.0931e-04
Loss = 3.3566e-02, PNorm = 71.5610, GNorm = 0.4716, lr_0 = 1.0924e-04
Loss = 3.5471e-02, PNorm = 71.5631, GNorm = 0.4760, lr_0 = 1.0916e-04
Loss = 2.8411e-02, PNorm = 71.5648, GNorm = 0.5182, lr_0 = 1.0909e-04
Loss = 3.1624e-02, PNorm = 71.5665, GNorm = 0.3906, lr_0 = 1.0901e-04
Loss = 2.8636e-02, PNorm = 71.5669, GNorm = 0.3822, lr_0 = 1.0894e-04
Loss = 2.6337e-02, PNorm = 71.5674, GNorm = 0.3860, lr_0 = 1.0886e-04
Loss = 3.3094e-02, PNorm = 71.5682, GNorm = 0.3758, lr_0 = 1.0879e-04
Loss = 3.6398e-02, PNorm = 71.5697, GNorm = 0.4971, lr_0 = 1.0871e-04
Loss = 3.1565e-02, PNorm = 71.5727, GNorm = 0.5630, lr_0 = 1.0864e-04
Loss = 3.1894e-02, PNorm = 71.5749, GNorm = 0.4507, lr_0 = 1.0856e-04
Validation mae = 0.391280
Epoch 29
Loss = 2.5403e-02, PNorm = 71.5765, GNorm = 0.4577, lr_0 = 1.0849e-04
Loss = 2.7405e-02, PNorm = 71.5781, GNorm = 0.5652, lr_0 = 1.0841e-04
Loss = 2.7196e-02, PNorm = 71.5794, GNorm = 0.4367, lr_0 = 1.0834e-04
Loss = 3.0349e-02, PNorm = 71.5807, GNorm = 0.7515, lr_0 = 1.0827e-04
Loss = 2.5701e-02, PNorm = 71.5823, GNorm = 0.4229, lr_0 = 1.0819e-04
Loss = 2.9282e-02, PNorm = 71.5845, GNorm = 0.4079, lr_0 = 1.0812e-04
Loss = 2.9195e-02, PNorm = 71.5865, GNorm = 0.7334, lr_0 = 1.0804e-04
Loss = 2.8801e-02, PNorm = 71.5874, GNorm = 0.3361, lr_0 = 1.0797e-04
Loss = 2.4883e-02, PNorm = 71.5883, GNorm = 0.4659, lr_0 = 1.0790e-04
Loss = 2.4699e-02, PNorm = 71.5897, GNorm = 0.6230, lr_0 = 1.0782e-04
Loss = 2.2376e-02, PNorm = 71.5911, GNorm = 0.5101, lr_0 = 1.0775e-04
Loss = 3.1534e-02, PNorm = 71.5926, GNorm = 0.5407, lr_0 = 1.0767e-04
Loss = 2.8019e-02, PNorm = 71.5943, GNorm = 0.7876, lr_0 = 1.0760e-04
Loss = 2.7753e-02, PNorm = 71.5965, GNorm = 0.3962, lr_0 = 1.0753e-04
Loss = 3.2611e-02, PNorm = 71.5975, GNorm = 0.4158, lr_0 = 1.0745e-04
Loss = 2.8167e-02, PNorm = 71.5985, GNorm = 0.4801, lr_0 = 1.0738e-04
Loss = 2.6759e-02, PNorm = 71.6003, GNorm = 0.5182, lr_0 = 1.0731e-04
Loss = 2.9937e-02, PNorm = 71.6013, GNorm = 0.6712, lr_0 = 1.0723e-04
Loss = 2.8102e-02, PNorm = 71.6021, GNorm = 0.4763, lr_0 = 1.0716e-04
Loss = 2.7092e-02, PNorm = 71.6032, GNorm = 0.4267, lr_0 = 1.0709e-04
Loss = 2.5040e-02, PNorm = 71.6050, GNorm = 0.4179, lr_0 = 1.0701e-04
Loss = 2.7151e-02, PNorm = 71.6070, GNorm = 0.4716, lr_0 = 1.0694e-04
Loss = 3.1048e-02, PNorm = 71.6084, GNorm = 0.4792, lr_0 = 1.0687e-04
Loss = 2.6619e-02, PNorm = 71.6099, GNorm = 0.4906, lr_0 = 1.0679e-04
Loss = 2.6884e-02, PNorm = 71.6103, GNorm = 0.5108, lr_0 = 1.0672e-04
Loss = 2.9818e-02, PNorm = 71.6116, GNorm = 0.7348, lr_0 = 1.0665e-04
Loss = 2.5284e-02, PNorm = 71.6136, GNorm = 0.6071, lr_0 = 1.0657e-04
Loss = 2.9439e-02, PNorm = 71.6157, GNorm = 0.3508, lr_0 = 1.0650e-04
Loss = 3.1948e-02, PNorm = 71.6172, GNorm = 0.5604, lr_0 = 1.0643e-04
Loss = 3.0355e-02, PNorm = 71.6192, GNorm = 0.5336, lr_0 = 1.0635e-04
Loss = 2.9021e-02, PNorm = 71.6207, GNorm = 0.4146, lr_0 = 1.0628e-04
Loss = 2.6076e-02, PNorm = 71.6221, GNorm = 0.4858, lr_0 = 1.0621e-04
Loss = 2.1776e-02, PNorm = 71.6237, GNorm = 0.4541, lr_0 = 1.0614e-04
Loss = 3.0538e-02, PNorm = 71.6254, GNorm = 0.4842, lr_0 = 1.0606e-04
Loss = 2.5373e-02, PNorm = 71.6276, GNorm = 0.4045, lr_0 = 1.0599e-04
Loss = 2.6869e-02, PNorm = 71.6297, GNorm = 0.4491, lr_0 = 1.0592e-04
Loss = 2.7530e-02, PNorm = 71.6314, GNorm = 0.4081, lr_0 = 1.0585e-04
Loss = 2.8168e-02, PNorm = 71.6322, GNorm = 0.6319, lr_0 = 1.0577e-04
Loss = 2.4936e-02, PNorm = 71.6335, GNorm = 0.4339, lr_0 = 1.0570e-04
Loss = 3.0589e-02, PNorm = 71.6353, GNorm = 0.4396, lr_0 = 1.0563e-04
Loss = 3.0325e-02, PNorm = 71.6377, GNorm = 0.6488, lr_0 = 1.0556e-04
Loss = 2.8942e-02, PNorm = 71.6394, GNorm = 0.5483, lr_0 = 1.0548e-04
Loss = 2.5608e-02, PNorm = 71.6404, GNorm = 0.4900, lr_0 = 1.0541e-04
Loss = 2.9481e-02, PNorm = 71.6421, GNorm = 0.4263, lr_0 = 1.0534e-04
Loss = 2.7462e-02, PNorm = 71.6429, GNorm = 0.3528, lr_0 = 1.0527e-04
Loss = 3.3872e-02, PNorm = 71.6450, GNorm = 0.4032, lr_0 = 1.0519e-04
Loss = 2.9685e-02, PNorm = 71.6470, GNorm = 0.3749, lr_0 = 1.0512e-04
Loss = 3.4814e-02, PNorm = 71.6490, GNorm = 0.6001, lr_0 = 1.0505e-04
Loss = 2.7092e-02, PNorm = 71.6516, GNorm = 0.5003, lr_0 = 1.0498e-04
Loss = 2.9319e-02, PNorm = 71.6543, GNorm = 0.6059, lr_0 = 1.0491e-04
Loss = 2.4208e-02, PNorm = 71.6558, GNorm = 0.2526, lr_0 = 1.0483e-04
Loss = 2.6941e-02, PNorm = 71.6566, GNorm = 0.4331, lr_0 = 1.0476e-04
Loss = 2.7006e-02, PNorm = 71.6576, GNorm = 0.5438, lr_0 = 1.0469e-04
Loss = 2.7794e-02, PNorm = 71.6583, GNorm = 0.4522, lr_0 = 1.0462e-04
Loss = 2.7921e-02, PNorm = 71.6594, GNorm = 0.3534, lr_0 = 1.0455e-04
Loss = 3.2775e-02, PNorm = 71.6602, GNorm = 0.4586, lr_0 = 1.0448e-04
Loss = 2.8514e-02, PNorm = 71.6610, GNorm = 0.4421, lr_0 = 1.0440e-04
Loss = 3.1862e-02, PNorm = 71.6617, GNorm = 0.5264, lr_0 = 1.0433e-04
Loss = 2.6788e-02, PNorm = 71.6630, GNorm = 0.5207, lr_0 = 1.0426e-04
Loss = 2.6666e-02, PNorm = 71.6643, GNorm = 0.4440, lr_0 = 1.0419e-04
Loss = 3.6132e-02, PNorm = 71.6658, GNorm = 0.5299, lr_0 = 1.0412e-04
Loss = 3.2953e-02, PNorm = 71.6667, GNorm = 0.5211, lr_0 = 1.0405e-04
Loss = 3.3552e-02, PNorm = 71.6686, GNorm = 0.5542, lr_0 = 1.0398e-04
Loss = 3.0609e-02, PNorm = 71.6702, GNorm = 0.5245, lr_0 = 1.0391e-04
Loss = 2.5773e-02, PNorm = 71.6716, GNorm = 0.5094, lr_0 = 1.0383e-04
Loss = 2.9349e-02, PNorm = 71.6726, GNorm = 0.3496, lr_0 = 1.0376e-04
Loss = 3.1947e-02, PNorm = 71.6734, GNorm = 0.4981, lr_0 = 1.0369e-04
Loss = 2.6229e-02, PNorm = 71.6754, GNorm = 0.5217, lr_0 = 1.0362e-04
Loss = 3.6574e-02, PNorm = 71.6772, GNorm = 0.5358, lr_0 = 1.0355e-04
Loss = 2.8931e-02, PNorm = 71.6792, GNorm = 0.5480, lr_0 = 1.0348e-04
Loss = 2.8535e-02, PNorm = 71.6808, GNorm = 0.3693, lr_0 = 1.0341e-04
Loss = 3.0926e-02, PNorm = 71.6823, GNorm = 0.4926, lr_0 = 1.0334e-04
Loss = 2.8984e-02, PNorm = 71.6840, GNorm = 0.5601, lr_0 = 1.0327e-04
Loss = 3.0593e-02, PNorm = 71.6855, GNorm = 0.5484, lr_0 = 1.0320e-04
Loss = 3.1247e-02, PNorm = 71.6860, GNorm = 0.7244, lr_0 = 1.0312e-04
Loss = 2.9640e-02, PNorm = 71.6879, GNorm = 0.6442, lr_0 = 1.0305e-04
Loss = 2.8097e-02, PNorm = 71.6903, GNorm = 0.5758, lr_0 = 1.0298e-04
Loss = 3.0848e-02, PNorm = 71.6918, GNorm = 0.5548, lr_0 = 1.0291e-04
Loss = 2.6483e-02, PNorm = 71.6929, GNorm = 0.5576, lr_0 = 1.0284e-04
Loss = 2.3554e-02, PNorm = 71.6935, GNorm = 0.6478, lr_0 = 1.0277e-04
Loss = 3.0895e-02, PNorm = 71.6950, GNorm = 0.4498, lr_0 = 1.0270e-04
Loss = 3.2729e-02, PNorm = 71.6967, GNorm = 0.5360, lr_0 = 1.0263e-04
Loss = 3.1353e-02, PNorm = 71.6973, GNorm = 0.5343, lr_0 = 1.0256e-04
Loss = 3.2451e-02, PNorm = 71.6982, GNorm = 0.4317, lr_0 = 1.0249e-04
Loss = 2.8557e-02, PNorm = 71.6997, GNorm = 0.5442, lr_0 = 1.0242e-04
Loss = 2.9792e-02, PNorm = 71.7019, GNorm = 0.4682, lr_0 = 1.0235e-04
Loss = 2.7498e-02, PNorm = 71.7031, GNorm = 0.4278, lr_0 = 1.0228e-04
Loss = 2.8629e-02, PNorm = 71.7043, GNorm = 0.6545, lr_0 = 1.0221e-04
Loss = 3.0816e-02, PNorm = 71.7054, GNorm = 0.3651, lr_0 = 1.0214e-04
Loss = 3.4139e-02, PNorm = 71.7060, GNorm = 0.6510, lr_0 = 1.0207e-04
Loss = 2.8208e-02, PNorm = 71.7079, GNorm = 0.4145, lr_0 = 1.0200e-04
Loss = 3.1398e-02, PNorm = 71.7098, GNorm = 0.5859, lr_0 = 1.0193e-04
Loss = 2.7028e-02, PNorm = 71.7116, GNorm = 0.4346, lr_0 = 1.0186e-04
Loss = 3.0258e-02, PNorm = 71.7127, GNorm = 0.3083, lr_0 = 1.0179e-04
Loss = 2.8018e-02, PNorm = 71.7132, GNorm = 0.3974, lr_0 = 1.0172e-04
Loss = 3.1245e-02, PNorm = 71.7136, GNorm = 0.4980, lr_0 = 1.0165e-04
Loss = 2.7976e-02, PNorm = 71.7147, GNorm = 0.3631, lr_0 = 1.0158e-04
Loss = 3.0382e-02, PNorm = 71.7165, GNorm = 0.4497, lr_0 = 1.0151e-04
Loss = 3.3097e-02, PNorm = 71.7178, GNorm = 0.6378, lr_0 = 1.0144e-04
Loss = 2.7961e-02, PNorm = 71.7189, GNorm = 0.3702, lr_0 = 1.0137e-04
Loss = 3.1173e-02, PNorm = 71.7205, GNorm = 0.4189, lr_0 = 1.0130e-04
Loss = 3.1472e-02, PNorm = 71.7218, GNorm = 0.4657, lr_0 = 1.0123e-04
Loss = 3.3572e-02, PNorm = 71.7233, GNorm = 0.6087, lr_0 = 1.0116e-04
Loss = 2.6569e-02, PNorm = 71.7249, GNorm = 0.4469, lr_0 = 1.0110e-04
Loss = 2.9985e-02, PNorm = 71.7258, GNorm = 0.4074, lr_0 = 1.0103e-04
Loss = 2.9667e-02, PNorm = 71.7261, GNorm = 0.3889, lr_0 = 1.0096e-04
Loss = 2.8760e-02, PNorm = 71.7270, GNorm = 0.5422, lr_0 = 1.0089e-04
Loss = 3.0761e-02, PNorm = 71.7269, GNorm = 0.6615, lr_0 = 1.0082e-04
Loss = 3.1307e-02, PNorm = 71.7274, GNorm = 0.4223, lr_0 = 1.0075e-04
Loss = 3.3503e-02, PNorm = 71.7280, GNorm = 0.5047, lr_0 = 1.0068e-04
Loss = 2.3667e-02, PNorm = 71.7290, GNorm = 0.6517, lr_0 = 1.0061e-04
Loss = 2.9951e-02, PNorm = 71.7305, GNorm = 0.5823, lr_0 = 1.0054e-04
Loss = 3.5053e-02, PNorm = 71.7330, GNorm = 0.4177, lr_0 = 1.0047e-04
Loss = 2.6274e-02, PNorm = 71.7355, GNorm = 0.4589, lr_0 = 1.0041e-04
Loss = 3.0468e-02, PNorm = 71.7366, GNorm = 0.3870, lr_0 = 1.0034e-04
Loss = 2.9163e-02, PNorm = 71.7367, GNorm = 0.3839, lr_0 = 1.0027e-04
Loss = 2.7632e-02, PNorm = 71.7373, GNorm = 0.4455, lr_0 = 1.0020e-04
Loss = 3.0741e-02, PNorm = 71.7379, GNorm = 0.4559, lr_0 = 1.0013e-04
Loss = 3.4377e-02, PNorm = 71.7392, GNorm = 0.4326, lr_0 = 1.0006e-04
Loss = 2.9075e-02, PNorm = 71.7401, GNorm = 0.5053, lr_0 = 1.0000e-04
Validation mae = 0.389944
Model 0 best validation mae = 0.382695 on epoch 20
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Moving model to cuda
Model 0 test mae = 0.378214
Ensemble test mae = 0.378214
Fold 6
Splitting data with seed 6
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.0, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=400, bias=False)
        (W_h): Linear(in_features=400, out_features=400, bias=False)
        (W_o): Linear(in_features=533, out_features=400, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=400, out_features=400, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=400, out_features=1, bias=True)
  )
)
Number of parameters = 593,201
Moving model to cuda
Epoch 0
Loss = 1.0492e+00, PNorm = 38.3705, GNorm = 4.2122, lr_0 = 1.0413e-04
Loss = 8.5280e-01, PNorm = 38.3723, GNorm = 3.0444, lr_0 = 1.0788e-04
Loss = 8.1103e-01, PNorm = 38.3750, GNorm = 1.5416, lr_0 = 1.1163e-04
Loss = 7.3434e-01, PNorm = 38.3786, GNorm = 1.3117, lr_0 = 1.1537e-04
Loss = 7.9511e-01, PNorm = 38.3815, GNorm = 3.7801, lr_0 = 1.1913e-04
Loss = 7.4825e-01, PNorm = 38.3839, GNorm = 5.3525, lr_0 = 1.2287e-04
Loss = 7.1424e-01, PNorm = 38.3870, GNorm = 3.8479, lr_0 = 1.2663e-04
Loss = 6.5404e-01, PNorm = 38.3910, GNorm = 1.6132, lr_0 = 1.3038e-04
Loss = 6.3651e-01, PNorm = 38.3968, GNorm = 2.2968, lr_0 = 1.3413e-04
Loss = 5.6128e-01, PNorm = 38.4038, GNorm = 3.3262, lr_0 = 1.3788e-04
Loss = 6.0272e-01, PNorm = 38.4109, GNorm = 8.3101, lr_0 = 1.4163e-04
Loss = 6.3897e-01, PNorm = 38.4174, GNorm = 4.5877, lr_0 = 1.4537e-04
Loss = 5.4994e-01, PNorm = 38.4243, GNorm = 10.7505, lr_0 = 1.4913e-04
Loss = 5.5701e-01, PNorm = 38.4303, GNorm = 10.1914, lr_0 = 1.5288e-04
Loss = 5.4628e-01, PNorm = 38.4358, GNorm = 6.6217, lr_0 = 1.5662e-04
Loss = 4.6164e-01, PNorm = 38.4423, GNorm = 3.3560, lr_0 = 1.6038e-04
Loss = 4.8821e-01, PNorm = 38.4508, GNorm = 10.0435, lr_0 = 1.6412e-04
Loss = 4.7829e-01, PNorm = 38.4569, GNorm = 21.0109, lr_0 = 1.6788e-04
Loss = 5.1341e-01, PNorm = 38.4628, GNorm = 2.9667, lr_0 = 1.7163e-04
Loss = 5.6394e-01, PNorm = 38.4674, GNorm = 7.5492, lr_0 = 1.7538e-04
Loss = 4.8007e-01, PNorm = 38.4728, GNorm = 4.6329, lr_0 = 1.7913e-04
Loss = 5.1495e-01, PNorm = 38.4778, GNorm = 6.2666, lr_0 = 1.8288e-04
Loss = 5.0225e-01, PNorm = 38.4817, GNorm = 3.8443, lr_0 = 1.8662e-04
Loss = 4.1592e-01, PNorm = 38.4879, GNorm = 1.6511, lr_0 = 1.9038e-04
Loss = 4.5392e-01, PNorm = 38.4964, GNorm = 14.2324, lr_0 = 1.9413e-04
Loss = 3.9983e-01, PNorm = 38.5035, GNorm = 4.9630, lr_0 = 1.9788e-04
Loss = 3.6855e-01, PNorm = 38.5106, GNorm = 8.5695, lr_0 = 2.0163e-04
Loss = 4.0326e-01, PNorm = 38.5155, GNorm = 17.9025, lr_0 = 2.0537e-04
Loss = 4.3941e-01, PNorm = 38.5205, GNorm = 4.1740, lr_0 = 2.0913e-04
Loss = 3.2090e-01, PNorm = 38.5275, GNorm = 2.2470, lr_0 = 2.1288e-04
Loss = 3.6890e-01, PNorm = 38.5318, GNorm = 14.6765, lr_0 = 2.1663e-04
Loss = 3.4375e-01, PNorm = 38.5383, GNorm = 4.1802, lr_0 = 2.2038e-04
Loss = 3.6779e-01, PNorm = 38.5450, GNorm = 2.4943, lr_0 = 2.2412e-04
Loss = 3.7354e-01, PNorm = 38.5507, GNorm = 20.3481, lr_0 = 2.2787e-04
Loss = 4.4893e-01, PNorm = 38.5538, GNorm = 16.6122, lr_0 = 2.3163e-04
Loss = 3.9368e-01, PNorm = 38.5597, GNorm = 2.0012, lr_0 = 2.3538e-04
Loss = 3.5582e-01, PNorm = 38.5665, GNorm = 14.4187, lr_0 = 2.3913e-04
Loss = 4.0606e-01, PNorm = 38.5742, GNorm = 4.3391, lr_0 = 2.4288e-04
Loss = 3.2655e-01, PNorm = 38.5820, GNorm = 4.1705, lr_0 = 2.4662e-04
Loss = 3.3796e-01, PNorm = 38.5908, GNorm = 17.3818, lr_0 = 2.5038e-04
Loss = 4.9689e-01, PNorm = 38.5950, GNorm = 19.2106, lr_0 = 2.5413e-04
Loss = 3.8504e-01, PNorm = 38.5994, GNorm = 1.9189, lr_0 = 2.5788e-04
Loss = 3.7699e-01, PNorm = 38.6075, GNorm = 2.5490, lr_0 = 2.6163e-04
Loss = 3.3093e-01, PNorm = 38.6143, GNorm = 10.0687, lr_0 = 2.6537e-04
Loss = 3.1400e-01, PNorm = 38.6220, GNorm = 6.2631, lr_0 = 2.6912e-04
Loss = 3.1496e-01, PNorm = 38.6293, GNorm = 2.2076, lr_0 = 2.7288e-04
Loss = 3.5607e-01, PNorm = 38.6336, GNorm = 16.9349, lr_0 = 2.7663e-04
Loss = 3.4305e-01, PNorm = 38.6383, GNorm = 5.4608, lr_0 = 2.8038e-04
Loss = 3.7987e-01, PNorm = 38.6454, GNorm = 7.6165, lr_0 = 2.8413e-04
Loss = 3.4049e-01, PNorm = 38.6517, GNorm = 7.4887, lr_0 = 2.8787e-04
Loss = 3.4016e-01, PNorm = 38.6581, GNorm = 5.1088, lr_0 = 2.9163e-04
Loss = 3.7870e-01, PNorm = 38.6627, GNorm = 16.8079, lr_0 = 2.9538e-04
Loss = 2.9818e-01, PNorm = 38.6686, GNorm = 2.0282, lr_0 = 2.9913e-04
Loss = 3.2308e-01, PNorm = 38.6746, GNorm = 19.6903, lr_0 = 3.0288e-04
Loss = 3.7093e-01, PNorm = 38.6824, GNorm = 13.7122, lr_0 = 3.0662e-04
Loss = 4.3193e-01, PNorm = 38.6870, GNorm = 4.3450, lr_0 = 3.1037e-04
Loss = 3.9100e-01, PNorm = 38.6930, GNorm = 5.0298, lr_0 = 3.1413e-04
Loss = 4.1138e-01, PNorm = 38.7019, GNorm = 4.5057, lr_0 = 3.1788e-04
Loss = 3.8555e-01, PNorm = 38.7102, GNorm = 11.5126, lr_0 = 3.2163e-04
Loss = 3.6563e-01, PNorm = 38.7182, GNorm = 4.5697, lr_0 = 3.2538e-04
Loss = 3.2974e-01, PNorm = 38.7282, GNorm = 4.6930, lr_0 = 3.2912e-04
Loss = 2.9731e-01, PNorm = 38.7335, GNorm = 5.0997, lr_0 = 3.3288e-04
Loss = 3.5857e-01, PNorm = 38.7392, GNorm = 11.1625, lr_0 = 3.3663e-04
Loss = 3.1962e-01, PNorm = 38.7475, GNorm = 10.5644, lr_0 = 3.4038e-04
Loss = 3.2803e-01, PNorm = 38.7560, GNorm = 8.8118, lr_0 = 3.4413e-04
Loss = 3.5497e-01, PNorm = 38.7650, GNorm = 6.2141, lr_0 = 3.4787e-04
Loss = 2.9359e-01, PNorm = 38.7726, GNorm = 2.3931, lr_0 = 3.5162e-04
Loss = 3.1150e-01, PNorm = 38.7781, GNorm = 5.6631, lr_0 = 3.5538e-04
Loss = 3.2919e-01, PNorm = 38.7849, GNorm = 1.9903, lr_0 = 3.5913e-04
Loss = 3.3893e-01, PNorm = 38.7913, GNorm = 14.7622, lr_0 = 3.6288e-04
Loss = 2.7743e-01, PNorm = 38.7971, GNorm = 8.9978, lr_0 = 3.6662e-04
Loss = 3.0700e-01, PNorm = 38.8027, GNorm = 5.6409, lr_0 = 3.7037e-04
Loss = 2.8892e-01, PNorm = 38.8097, GNorm = 15.1063, lr_0 = 3.7413e-04
Loss = 3.0685e-01, PNorm = 38.8160, GNorm = 10.0873, lr_0 = 3.7788e-04
Loss = 3.3652e-01, PNorm = 38.8211, GNorm = 5.0551, lr_0 = 3.8163e-04
Loss = 2.9996e-01, PNorm = 38.8288, GNorm = 5.4353, lr_0 = 3.8537e-04
Loss = 2.7683e-01, PNorm = 38.8355, GNorm = 8.8770, lr_0 = 3.8912e-04
Loss = 2.9958e-01, PNorm = 38.8461, GNorm = 11.6499, lr_0 = 3.9287e-04
Loss = 3.3185e-01, PNorm = 38.8503, GNorm = 7.5216, lr_0 = 3.9663e-04
Loss = 2.9611e-01, PNorm = 38.8590, GNorm = 1.8113, lr_0 = 4.0038e-04
Loss = 3.1553e-01, PNorm = 38.8685, GNorm = 3.6592, lr_0 = 4.0413e-04
Loss = 3.0518e-01, PNorm = 38.8783, GNorm = 6.7735, lr_0 = 4.0787e-04
Loss = 2.9804e-01, PNorm = 38.8896, GNorm = 5.1499, lr_0 = 4.1162e-04
Loss = 2.9785e-01, PNorm = 38.8973, GNorm = 6.9885, lr_0 = 4.1537e-04
Loss = 2.9248e-01, PNorm = 38.9049, GNorm = 10.8086, lr_0 = 4.1913e-04
Loss = 3.3210e-01, PNorm = 38.9129, GNorm = 9.5906, lr_0 = 4.2288e-04
Loss = 2.9377e-01, PNorm = 38.9224, GNorm = 6.6093, lr_0 = 4.2662e-04
Loss = 2.9630e-01, PNorm = 38.9296, GNorm = 2.1297, lr_0 = 4.3037e-04
Loss = 3.4039e-01, PNorm = 38.9377, GNorm = 12.4335, lr_0 = 4.3412e-04
Loss = 3.5467e-01, PNorm = 38.9460, GNorm = 0.8796, lr_0 = 4.3788e-04
Loss = 4.2662e-01, PNorm = 38.9571, GNorm = 11.5483, lr_0 = 4.4163e-04
Loss = 3.4922e-01, PNorm = 38.9689, GNorm = 2.9642, lr_0 = 4.4538e-04
Loss = 3.0517e-01, PNorm = 38.9817, GNorm = 3.4242, lr_0 = 4.4912e-04
Loss = 2.9437e-01, PNorm = 38.9948, GNorm = 2.7158, lr_0 = 4.5287e-04
Loss = 3.0397e-01, PNorm = 39.0084, GNorm = 3.9937, lr_0 = 4.5662e-04
Loss = 2.4151e-01, PNorm = 39.0200, GNorm = 2.0180, lr_0 = 4.6038e-04
Loss = 2.9832e-01, PNorm = 39.0291, GNorm = 15.1857, lr_0 = 4.6413e-04
Loss = 3.1892e-01, PNorm = 39.0364, GNorm = 1.4922, lr_0 = 4.6787e-04
Loss = 2.4435e-01, PNorm = 39.0484, GNorm = 4.4387, lr_0 = 4.7162e-04
Loss = 2.4449e-01, PNorm = 39.0588, GNorm = 1.7423, lr_0 = 4.7537e-04
Loss = 2.4777e-01, PNorm = 39.0696, GNorm = 9.4428, lr_0 = 4.7913e-04
Loss = 2.8918e-01, PNorm = 39.0805, GNorm = 1.3733, lr_0 = 4.8288e-04
Loss = 2.6650e-01, PNorm = 39.0908, GNorm = 5.1847, lr_0 = 4.8663e-04
Loss = 2.8325e-01, PNorm = 39.0965, GNorm = 6.4965, lr_0 = 4.9038e-04
Loss = 2.7512e-01, PNorm = 39.1060, GNorm = 3.3617, lr_0 = 4.9412e-04
Loss = 2.7813e-01, PNorm = 39.1204, GNorm = 6.2031, lr_0 = 4.9788e-04
Loss = 3.0029e-01, PNorm = 39.1357, GNorm = 6.3196, lr_0 = 5.0163e-04
Loss = 3.2090e-01, PNorm = 39.1433, GNorm = 3.1558, lr_0 = 5.0538e-04
Loss = 2.9874e-01, PNorm = 39.1554, GNorm = 6.1127, lr_0 = 5.0913e-04
Loss = 2.5769e-01, PNorm = 39.1676, GNorm = 1.5928, lr_0 = 5.1287e-04
Loss = 2.5612e-01, PNorm = 39.1794, GNorm = 1.4374, lr_0 = 5.1663e-04
Loss = 2.5836e-01, PNorm = 39.1879, GNorm = 1.6157, lr_0 = 5.2038e-04
Loss = 2.4269e-01, PNorm = 39.2006, GNorm = 1.7049, lr_0 = 5.2413e-04
Loss = 2.4230e-01, PNorm = 39.2120, GNorm = 1.6652, lr_0 = 5.2788e-04
Loss = 2.6851e-01, PNorm = 39.2257, GNorm = 4.2133, lr_0 = 5.3162e-04
Loss = 2.7778e-01, PNorm = 39.2356, GNorm = 3.5721, lr_0 = 5.3538e-04
Loss = 2.7967e-01, PNorm = 39.2475, GNorm = 6.1133, lr_0 = 5.3912e-04
Loss = 2.9953e-01, PNorm = 39.2585, GNorm = 5.7429, lr_0 = 5.4288e-04
Loss = 3.1336e-01, PNorm = 39.2717, GNorm = 1.9822, lr_0 = 5.4663e-04
Loss = 2.9389e-01, PNorm = 39.2867, GNorm = 2.7655, lr_0 = 5.5038e-04
Validation mae = 0.590199
Epoch 1
Loss = 2.9435e-01, PNorm = 39.2992, GNorm = 5.9209, lr_0 = 5.5413e-04
Loss = 2.9163e-01, PNorm = 39.3140, GNorm = 4.3060, lr_0 = 5.5787e-04
Loss = 2.8645e-01, PNorm = 39.3306, GNorm = 5.6196, lr_0 = 5.6163e-04
Loss = 3.0756e-01, PNorm = 39.3505, GNorm = 12.3467, lr_0 = 5.6538e-04
Loss = 2.8000e-01, PNorm = 39.3663, GNorm = 3.7387, lr_0 = 5.6913e-04
Loss = 2.7604e-01, PNorm = 39.3817, GNorm = 3.6122, lr_0 = 5.7288e-04
Loss = 2.5601e-01, PNorm = 39.3972, GNorm = 6.7731, lr_0 = 5.7662e-04
Loss = 2.3667e-01, PNorm = 39.4143, GNorm = 3.8604, lr_0 = 5.8038e-04
Loss = 3.3564e-01, PNorm = 39.4299, GNorm = 10.3460, lr_0 = 5.8413e-04
Loss = 3.6664e-01, PNorm = 39.4450, GNorm = 6.2696, lr_0 = 5.8788e-04
Loss = 3.0764e-01, PNorm = 39.4670, GNorm = 1.4877, lr_0 = 5.9163e-04
Loss = 3.0707e-01, PNorm = 39.4818, GNorm = 7.1840, lr_0 = 5.9538e-04
Loss = 2.6945e-01, PNorm = 39.4982, GNorm = 8.0392, lr_0 = 5.9913e-04
Loss = 2.3808e-01, PNorm = 39.5174, GNorm = 3.1511, lr_0 = 6.0288e-04
Loss = 2.2304e-01, PNorm = 39.5320, GNorm = 2.3884, lr_0 = 6.0663e-04
Loss = 2.2783e-01, PNorm = 39.5455, GNorm = 1.2519, lr_0 = 6.1038e-04
Loss = 2.9997e-01, PNorm = 39.5603, GNorm = 4.6283, lr_0 = 6.1413e-04
Loss = 2.5410e-01, PNorm = 39.5754, GNorm = 6.5645, lr_0 = 6.1788e-04
Loss = 2.5379e-01, PNorm = 39.5904, GNorm = 5.5013, lr_0 = 6.2163e-04
Loss = 2.8181e-01, PNorm = 39.6042, GNorm = 2.7427, lr_0 = 6.2538e-04
Loss = 2.7957e-01, PNorm = 39.6234, GNorm = 3.1262, lr_0 = 6.2913e-04
Loss = 3.0917e-01, PNorm = 39.6406, GNorm = 1.1952, lr_0 = 6.3288e-04
Loss = 2.8063e-01, PNorm = 39.6642, GNorm = 1.7527, lr_0 = 6.3663e-04
Loss = 3.1560e-01, PNorm = 39.6830, GNorm = 3.5077, lr_0 = 6.4038e-04
Loss = 2.9233e-01, PNorm = 39.7005, GNorm = 4.2328, lr_0 = 6.4413e-04
Loss = 2.3547e-01, PNorm = 39.7216, GNorm = 6.5608, lr_0 = 6.4788e-04
Loss = 2.5034e-01, PNorm = 39.7401, GNorm = 2.6570, lr_0 = 6.5163e-04
Loss = 2.5107e-01, PNorm = 39.7568, GNorm = 8.5287, lr_0 = 6.5538e-04
Loss = 2.9528e-01, PNorm = 39.7729, GNorm = 5.6184, lr_0 = 6.5913e-04
Loss = 2.8679e-01, PNorm = 39.7896, GNorm = 7.2453, lr_0 = 6.6288e-04
Loss = 2.5700e-01, PNorm = 39.8131, GNorm = 1.4586, lr_0 = 6.6663e-04
Loss = 2.5256e-01, PNorm = 39.8335, GNorm = 9.2524, lr_0 = 6.7038e-04
Loss = 2.5997e-01, PNorm = 39.8488, GNorm = 7.3307, lr_0 = 6.7413e-04
Loss = 2.2575e-01, PNorm = 39.8706, GNorm = 1.1229, lr_0 = 6.7788e-04
Loss = 2.5716e-01, PNorm = 39.8930, GNorm = 1.2801, lr_0 = 6.8163e-04
Loss = 2.9208e-01, PNorm = 39.9039, GNorm = 3.7979, lr_0 = 6.8538e-04
Loss = 2.7487e-01, PNorm = 39.9186, GNorm = 5.7799, lr_0 = 6.8913e-04
Loss = 2.6657e-01, PNorm = 39.9404, GNorm = 1.3329, lr_0 = 6.9288e-04
Loss = 2.1556e-01, PNorm = 39.9551, GNorm = 5.0945, lr_0 = 6.9663e-04
Loss = 2.4395e-01, PNorm = 39.9604, GNorm = 1.3448, lr_0 = 7.0038e-04
Loss = 2.3800e-01, PNorm = 39.9740, GNorm = 1.5412, lr_0 = 7.0413e-04
Loss = 2.2609e-01, PNorm = 39.9926, GNorm = 1.9674, lr_0 = 7.0788e-04
Loss = 2.3475e-01, PNorm = 40.0111, GNorm = 5.4469, lr_0 = 7.1163e-04
Loss = 2.6835e-01, PNorm = 40.0244, GNorm = 4.6393, lr_0 = 7.1538e-04
Loss = 2.1791e-01, PNorm = 40.0417, GNorm = 1.2564, lr_0 = 7.1913e-04
Loss = 2.0846e-01, PNorm = 40.0567, GNorm = 0.6764, lr_0 = 7.2288e-04
Loss = 2.2337e-01, PNorm = 40.0721, GNorm = 7.9417, lr_0 = 7.2663e-04
Loss = 2.8147e-01, PNorm = 40.0907, GNorm = 1.6435, lr_0 = 7.3038e-04
Loss = 3.4285e-01, PNorm = 40.1182, GNorm = 2.6163, lr_0 = 7.3413e-04
Loss = 2.5954e-01, PNorm = 40.1484, GNorm = 1.0034, lr_0 = 7.3788e-04
Loss = 2.3390e-01, PNorm = 40.1756, GNorm = 1.1117, lr_0 = 7.4163e-04
Loss = 2.3201e-01, PNorm = 40.1941, GNorm = 2.8981, lr_0 = 7.4538e-04
Loss = 2.4492e-01, PNorm = 40.2074, GNorm = 2.6144, lr_0 = 7.4913e-04
Loss = 2.2511e-01, PNorm = 40.2215, GNorm = 3.8959, lr_0 = 7.5288e-04
Loss = 2.1746e-01, PNorm = 40.2419, GNorm = 1.0523, lr_0 = 7.5663e-04
Loss = 2.1292e-01, PNorm = 40.2592, GNorm = 3.6765, lr_0 = 7.6038e-04
Loss = 2.3850e-01, PNorm = 40.2724, GNorm = 2.5985, lr_0 = 7.6413e-04
Loss = 2.2103e-01, PNorm = 40.2931, GNorm = 2.5050, lr_0 = 7.6788e-04
Loss = 2.3944e-01, PNorm = 40.3185, GNorm = 3.0227, lr_0 = 7.7163e-04
Loss = 2.8987e-01, PNorm = 40.3468, GNorm = 9.4403, lr_0 = 7.7538e-04
Loss = 3.2856e-01, PNorm = 40.3718, GNorm = 5.8097, lr_0 = 7.7913e-04
Loss = 2.6275e-01, PNorm = 40.4014, GNorm = 1.8107, lr_0 = 7.8288e-04
Loss = 2.8041e-01, PNorm = 40.4333, GNorm = 2.1056, lr_0 = 7.8663e-04
Loss = 2.4615e-01, PNorm = 40.4606, GNorm = 3.3827, lr_0 = 7.9038e-04
Loss = 2.4746e-01, PNorm = 40.4785, GNorm = 3.7321, lr_0 = 7.9413e-04
Loss = 2.6374e-01, PNorm = 40.5039, GNorm = 2.2054, lr_0 = 7.9788e-04
Loss = 1.9539e-01, PNorm = 40.5229, GNorm = 3.6758, lr_0 = 8.0163e-04
Loss = 2.6166e-01, PNorm = 40.5386, GNorm = 7.5324, lr_0 = 8.0538e-04
Loss = 2.6746e-01, PNorm = 40.5510, GNorm = 3.0257, lr_0 = 8.0913e-04
Loss = 2.3336e-01, PNorm = 40.5757, GNorm = 2.2927, lr_0 = 8.1288e-04
Loss = 2.5315e-01, PNorm = 40.5997, GNorm = 1.5730, lr_0 = 8.1663e-04
Loss = 2.4484e-01, PNorm = 40.6243, GNorm = 2.5524, lr_0 = 8.2038e-04
Loss = 2.8771e-01, PNorm = 40.6423, GNorm = 3.1308, lr_0 = 8.2413e-04
Loss = 2.3180e-01, PNorm = 40.6641, GNorm = 1.0554, lr_0 = 8.2788e-04
Loss = 2.2216e-01, PNorm = 40.6842, GNorm = 6.4826, lr_0 = 8.3163e-04
Loss = 1.9174e-01, PNorm = 40.7103, GNorm = 1.7479, lr_0 = 8.3538e-04
Loss = 2.5210e-01, PNorm = 40.7306, GNorm = 8.5086, lr_0 = 8.3913e-04
Loss = 2.6699e-01, PNorm = 40.7534, GNorm = 3.7764, lr_0 = 8.4288e-04
Loss = 2.3437e-01, PNorm = 40.7824, GNorm = 2.1810, lr_0 = 8.4663e-04
Loss = 2.3110e-01, PNorm = 40.8135, GNorm = 3.7447, lr_0 = 8.5038e-04
Loss = 2.2364e-01, PNorm = 40.8316, GNorm = 1.5548, lr_0 = 8.5413e-04
Loss = 2.1225e-01, PNorm = 40.8490, GNorm = 1.3184, lr_0 = 8.5788e-04
Loss = 2.4239e-01, PNorm = 40.8701, GNorm = 3.7927, lr_0 = 8.6163e-04
Loss = 2.0378e-01, PNorm = 40.8934, GNorm = 1.6619, lr_0 = 8.6538e-04
Loss = 2.0796e-01, PNorm = 40.9154, GNorm = 6.4431, lr_0 = 8.6913e-04
Loss = 2.5051e-01, PNorm = 40.9404, GNorm = 0.8239, lr_0 = 8.7288e-04
Loss = 2.0096e-01, PNorm = 40.9708, GNorm = 4.6180, lr_0 = 8.7663e-04
Loss = 2.3434e-01, PNorm = 40.9985, GNorm = 4.0205, lr_0 = 8.8038e-04
Loss = 2.3320e-01, PNorm = 41.0201, GNorm = 1.7288, lr_0 = 8.8413e-04
Loss = 2.8968e-01, PNorm = 41.0347, GNorm = 5.2487, lr_0 = 8.8788e-04
Loss = 2.6810e-01, PNorm = 41.0587, GNorm = 1.2951, lr_0 = 8.9163e-04
Loss = 2.4169e-01, PNorm = 41.0865, GNorm = 5.4213, lr_0 = 8.9538e-04
Loss = 2.0161e-01, PNorm = 41.1060, GNorm = 2.3883, lr_0 = 8.9913e-04
Loss = 2.1070e-01, PNorm = 41.1234, GNorm = 1.0159, lr_0 = 9.0288e-04
Loss = 2.0508e-01, PNorm = 41.1506, GNorm = 4.8562, lr_0 = 9.0663e-04
Loss = 2.2684e-01, PNorm = 41.1740, GNorm = 3.3695, lr_0 = 9.1038e-04
Loss = 2.6155e-01, PNorm = 41.1871, GNorm = 1.1422, lr_0 = 9.1413e-04
Loss = 2.2735e-01, PNorm = 41.2095, GNorm = 1.3300, lr_0 = 9.1788e-04
Loss = 2.1430e-01, PNorm = 41.2390, GNorm = 2.5112, lr_0 = 9.2163e-04
Loss = 2.4021e-01, PNorm = 41.2692, GNorm = 4.9404, lr_0 = 9.2538e-04
Loss = 2.4381e-01, PNorm = 41.2982, GNorm = 3.3371, lr_0 = 9.2913e-04
Loss = 2.4363e-01, PNorm = 41.3339, GNorm = 1.0103, lr_0 = 9.3288e-04
Loss = 2.3133e-01, PNorm = 41.3671, GNorm = 4.1961, lr_0 = 9.3663e-04
Loss = 2.5658e-01, PNorm = 41.3881, GNorm = 3.2583, lr_0 = 9.4038e-04
Loss = 2.0582e-01, PNorm = 41.4094, GNorm = 2.5412, lr_0 = 9.4413e-04
Loss = 2.1424e-01, PNorm = 41.4405, GNorm = 1.0091, lr_0 = 9.4788e-04
Loss = 2.0562e-01, PNorm = 41.4543, GNorm = 1.6364, lr_0 = 9.5163e-04
Loss = 2.4237e-01, PNorm = 41.4673, GNorm = 3.2394, lr_0 = 9.5538e-04
Loss = 2.7320e-01, PNorm = 41.4928, GNorm = 1.1088, lr_0 = 9.5913e-04
Loss = 2.0205e-01, PNorm = 41.5203, GNorm = 2.1812, lr_0 = 9.6288e-04
Loss = 2.2660e-01, PNorm = 41.5463, GNorm = 1.2149, lr_0 = 9.6663e-04
Loss = 1.9578e-01, PNorm = 41.5699, GNorm = 2.7251, lr_0 = 9.7038e-04
Loss = 1.9567e-01, PNorm = 41.5924, GNorm = 3.7277, lr_0 = 9.7413e-04
Loss = 2.0432e-01, PNorm = 41.6074, GNorm = 1.0242, lr_0 = 9.7788e-04
Loss = 2.2243e-01, PNorm = 41.6270, GNorm = 1.7065, lr_0 = 9.8163e-04
Loss = 1.9783e-01, PNorm = 41.6522, GNorm = 1.4252, lr_0 = 9.8537e-04
Loss = 2.8060e-01, PNorm = 41.6782, GNorm = 1.8974, lr_0 = 9.8912e-04
Loss = 2.5028e-01, PNorm = 41.7066, GNorm = 4.1786, lr_0 = 9.9288e-04
Loss = 2.3375e-01, PNorm = 41.7503, GNorm = 3.9066, lr_0 = 9.9663e-04
Loss = 2.2007e-01, PNorm = 41.7851, GNorm = 1.9105, lr_0 = 9.9993e-04
Validation mae = 0.590856
Epoch 2
Loss = 1.8638e-01, PNorm = 41.8134, GNorm = 2.9571, lr_0 = 9.9925e-04
Loss = 1.9762e-01, PNorm = 41.8439, GNorm = 1.8515, lr_0 = 9.9856e-04
Loss = 2.4311e-01, PNorm = 41.8661, GNorm = 3.7628, lr_0 = 9.9788e-04
Loss = 2.7447e-01, PNorm = 41.8990, GNorm = 6.0148, lr_0 = 9.9719e-04
Loss = 1.9604e-01, PNorm = 41.9419, GNorm = 2.0749, lr_0 = 9.9651e-04
Loss = 2.3490e-01, PNorm = 41.9738, GNorm = 5.4061, lr_0 = 9.9583e-04
Loss = 1.8761e-01, PNorm = 41.9992, GNorm = 2.5125, lr_0 = 9.9515e-04
Loss = 1.8154e-01, PNorm = 42.0180, GNorm = 1.0716, lr_0 = 9.9446e-04
Loss = 2.1136e-01, PNorm = 42.0350, GNorm = 5.3980, lr_0 = 9.9378e-04
Loss = 2.6700e-01, PNorm = 42.0539, GNorm = 2.9519, lr_0 = 9.9310e-04
Loss = 1.8693e-01, PNorm = 42.0855, GNorm = 0.6859, lr_0 = 9.9242e-04
Loss = 2.0998e-01, PNorm = 42.1127, GNorm = 0.9745, lr_0 = 9.9174e-04
Loss = 1.8110e-01, PNorm = 42.1412, GNorm = 3.3455, lr_0 = 9.9106e-04
Loss = 1.9578e-01, PNorm = 42.1683, GNorm = 2.3698, lr_0 = 9.9038e-04
Loss = 2.1141e-01, PNorm = 42.1974, GNorm = 1.5903, lr_0 = 9.8971e-04
Loss = 2.1369e-01, PNorm = 42.2297, GNorm = 1.1095, lr_0 = 9.8903e-04
Loss = 1.9612e-01, PNorm = 42.2551, GNorm = 4.5941, lr_0 = 9.8835e-04
Loss = 1.8923e-01, PNorm = 42.2695, GNorm = 3.3631, lr_0 = 9.8767e-04
Loss = 2.3029e-01, PNorm = 42.2977, GNorm = 1.4104, lr_0 = 9.8700e-04
Loss = 2.1143e-01, PNorm = 42.3281, GNorm = 1.6412, lr_0 = 9.8632e-04
Loss = 2.5067e-01, PNorm = 42.3582, GNorm = 1.3124, lr_0 = 9.8564e-04
Loss = 2.1986e-01, PNorm = 42.3830, GNorm = 3.4712, lr_0 = 9.8497e-04
Loss = 2.4121e-01, PNorm = 42.4130, GNorm = 1.1529, lr_0 = 9.8429e-04
Loss = 2.1733e-01, PNorm = 42.4460, GNorm = 5.0430, lr_0 = 9.8362e-04
Loss = 2.2514e-01, PNorm = 42.4822, GNorm = 2.5362, lr_0 = 9.8295e-04
Loss = 2.1014e-01, PNorm = 42.5161, GNorm = 1.8678, lr_0 = 9.8227e-04
Loss = 2.0373e-01, PNorm = 42.5466, GNorm = 4.7897, lr_0 = 9.8160e-04
Loss = 2.3971e-01, PNorm = 42.5715, GNorm = 5.2769, lr_0 = 9.8093e-04
Loss = 2.5594e-01, PNorm = 42.5996, GNorm = 1.2860, lr_0 = 9.8026e-04
Loss = 2.3657e-01, PNorm = 42.6388, GNorm = 4.0052, lr_0 = 9.7958e-04
Loss = 1.6290e-01, PNorm = 42.6715, GNorm = 2.3195, lr_0 = 9.7891e-04
Loss = 2.0716e-01, PNorm = 42.6921, GNorm = 2.0592, lr_0 = 9.7824e-04
Loss = 2.0414e-01, PNorm = 42.7031, GNorm = 1.2605, lr_0 = 9.7757e-04
Loss = 2.1897e-01, PNorm = 42.7246, GNorm = 2.0651, lr_0 = 9.7690e-04
Loss = 1.9902e-01, PNorm = 42.7499, GNorm = 0.7004, lr_0 = 9.7623e-04
Loss = 2.2807e-01, PNorm = 42.7838, GNorm = 1.3431, lr_0 = 9.7556e-04
Loss = 2.0048e-01, PNorm = 42.8273, GNorm = 1.2932, lr_0 = 9.7490e-04
Loss = 2.0396e-01, PNorm = 42.8494, GNorm = 2.9777, lr_0 = 9.7423e-04
Loss = 2.1650e-01, PNorm = 42.8673, GNorm = 4.7101, lr_0 = 9.7356e-04
Loss = 1.9002e-01, PNorm = 42.8934, GNorm = 2.3039, lr_0 = 9.7289e-04
Loss = 2.0718e-01, PNorm = 42.9182, GNorm = 2.3431, lr_0 = 9.7223e-04
Loss = 2.2178e-01, PNorm = 42.9569, GNorm = 3.0563, lr_0 = 9.7156e-04
Loss = 2.4186e-01, PNorm = 42.9778, GNorm = 2.3903, lr_0 = 9.7090e-04
Loss = 2.2248e-01, PNorm = 43.0115, GNorm = 4.0045, lr_0 = 9.7023e-04
Loss = 2.3466e-01, PNorm = 43.0513, GNorm = 2.5317, lr_0 = 9.6957e-04
Loss = 1.8776e-01, PNorm = 43.0918, GNorm = 2.1138, lr_0 = 9.6890e-04
Loss = 2.1871e-01, PNorm = 43.1196, GNorm = 2.4696, lr_0 = 9.6824e-04
Loss = 1.8970e-01, PNorm = 43.1494, GNorm = 2.5057, lr_0 = 9.6757e-04
Loss = 1.9914e-01, PNorm = 43.1785, GNorm = 0.9217, lr_0 = 9.6691e-04
Loss = 2.1210e-01, PNorm = 43.2012, GNorm = 3.5680, lr_0 = 9.6625e-04
Loss = 2.2245e-01, PNorm = 43.2302, GNorm = 2.8852, lr_0 = 9.6559e-04
Loss = 2.1622e-01, PNorm = 43.2550, GNorm = 1.4394, lr_0 = 9.6493e-04
Loss = 2.0229e-01, PNorm = 43.2819, GNorm = 1.7900, lr_0 = 9.6427e-04
Loss = 1.8793e-01, PNorm = 43.3089, GNorm = 4.5379, lr_0 = 9.6360e-04
Loss = 2.3682e-01, PNorm = 43.3357, GNorm = 3.6844, lr_0 = 9.6294e-04
Loss = 2.2962e-01, PNorm = 43.3723, GNorm = 1.5193, lr_0 = 9.6228e-04
Loss = 1.9447e-01, PNorm = 43.4034, GNorm = 4.5061, lr_0 = 9.6163e-04
Loss = 2.4975e-01, PNorm = 43.4324, GNorm = 7.0222, lr_0 = 9.6097e-04
Loss = 2.1304e-01, PNorm = 43.4795, GNorm = 0.9363, lr_0 = 9.6031e-04
Loss = 1.8657e-01, PNorm = 43.5094, GNorm = 1.1225, lr_0 = 9.5965e-04
Loss = 1.9677e-01, PNorm = 43.5237, GNorm = 0.9172, lr_0 = 9.5899e-04
Loss = 2.0036e-01, PNorm = 43.5454, GNorm = 1.4591, lr_0 = 9.5834e-04
Loss = 2.0059e-01, PNorm = 43.5683, GNorm = 3.8658, lr_0 = 9.5768e-04
Loss = 2.3341e-01, PNorm = 43.6013, GNorm = 4.6555, lr_0 = 9.5702e-04
Loss = 2.3979e-01, PNorm = 43.6405, GNorm = 3.2526, lr_0 = 9.5637e-04
Loss = 2.1291e-01, PNorm = 43.6742, GNorm = 1.0470, lr_0 = 9.5571e-04
Loss = 2.3867e-01, PNorm = 43.7039, GNorm = 3.6800, lr_0 = 9.5506e-04
Loss = 2.0767e-01, PNorm = 43.7402, GNorm = 1.9752, lr_0 = 9.5440e-04
Loss = 1.8747e-01, PNorm = 43.7677, GNorm = 1.9746, lr_0 = 9.5375e-04
Loss = 1.7563e-01, PNorm = 43.7987, GNorm = 2.1966, lr_0 = 9.5310e-04
Loss = 1.9032e-01, PNorm = 43.8167, GNorm = 2.3688, lr_0 = 9.5244e-04
Loss = 2.2694e-01, PNorm = 43.8426, GNorm = 2.9574, lr_0 = 9.5179e-04
Loss = 1.9891e-01, PNorm = 43.8630, GNorm = 1.9635, lr_0 = 9.5114e-04
Loss = 1.9732e-01, PNorm = 43.8919, GNorm = 0.9996, lr_0 = 9.5049e-04
Loss = 1.8482e-01, PNorm = 43.9174, GNorm = 3.2466, lr_0 = 9.4984e-04
Loss = 1.7872e-01, PNorm = 43.9235, GNorm = 0.6525, lr_0 = 9.4919e-04
Loss = 2.1193e-01, PNorm = 43.9436, GNorm = 2.8943, lr_0 = 9.4854e-04
Loss = 1.9284e-01, PNorm = 43.9703, GNorm = 1.3142, lr_0 = 9.4789e-04
Loss = 1.6731e-01, PNorm = 43.9980, GNorm = 2.9262, lr_0 = 9.4724e-04
Loss = 2.0439e-01, PNorm = 44.0167, GNorm = 1.0955, lr_0 = 9.4659e-04
Loss = 1.8524e-01, PNorm = 44.0382, GNorm = 2.9547, lr_0 = 9.4594e-04
Loss = 1.6673e-01, PNorm = 44.0682, GNorm = 3.0486, lr_0 = 9.4529e-04
Loss = 1.7587e-01, PNorm = 44.0929, GNorm = 1.3679, lr_0 = 9.4464e-04
Loss = 1.7242e-01, PNorm = 44.1170, GNorm = 2.5075, lr_0 = 9.4400e-04
Loss = 1.8532e-01, PNorm = 44.1390, GNorm = 4.8155, lr_0 = 9.4335e-04
Loss = 2.2575e-01, PNorm = 44.1737, GNorm = 2.0586, lr_0 = 9.4270e-04
Loss = 2.6088e-01, PNorm = 44.2074, GNorm = 3.0761, lr_0 = 9.4206e-04
Loss = 2.0705e-01, PNorm = 44.2412, GNorm = 0.8030, lr_0 = 9.4141e-04
Loss = 1.9432e-01, PNorm = 44.2696, GNorm = 3.0507, lr_0 = 9.4077e-04
Loss = 1.9611e-01, PNorm = 44.2877, GNorm = 1.3084, lr_0 = 9.4012e-04
Loss = 1.9364e-01, PNorm = 44.3052, GNorm = 1.5086, lr_0 = 9.3948e-04
Loss = 1.9239e-01, PNorm = 44.3284, GNorm = 2.8527, lr_0 = 9.3884e-04
Loss = 1.8594e-01, PNorm = 44.3517, GNorm = 0.7256, lr_0 = 9.3819e-04
Loss = 1.5538e-01, PNorm = 44.3809, GNorm = 3.1499, lr_0 = 9.3755e-04
Loss = 1.8783e-01, PNorm = 44.3975, GNorm = 1.9016, lr_0 = 9.3691e-04
Loss = 1.8720e-01, PNorm = 44.4216, GNorm = 1.3443, lr_0 = 9.3627e-04
Loss = 1.8594e-01, PNorm = 44.4468, GNorm = 0.8133, lr_0 = 9.3562e-04
Loss = 1.7301e-01, PNorm = 44.4642, GNorm = 1.3807, lr_0 = 9.3498e-04
Loss = 1.7728e-01, PNorm = 44.4871, GNorm = 0.6303, lr_0 = 9.3434e-04
Loss = 2.0194e-01, PNorm = 44.5152, GNorm = 1.4056, lr_0 = 9.3370e-04
Loss = 2.1236e-01, PNorm = 44.5377, GNorm = 2.6138, lr_0 = 9.3306e-04
Loss = 2.1879e-01, PNorm = 44.5526, GNorm = 3.0512, lr_0 = 9.3242e-04
Loss = 1.8059e-01, PNorm = 44.5758, GNorm = 0.9278, lr_0 = 9.3178e-04
Loss = 1.6954e-01, PNorm = 44.6002, GNorm = 1.2413, lr_0 = 9.3115e-04
Loss = 1.6376e-01, PNorm = 44.6278, GNorm = 2.5007, lr_0 = 9.3051e-04
Loss = 1.8067e-01, PNorm = 44.6525, GNorm = 2.1092, lr_0 = 9.2987e-04
Loss = 1.7537e-01, PNorm = 44.6808, GNorm = 3.8745, lr_0 = 9.2923e-04
Loss = 1.6848e-01, PNorm = 44.7066, GNorm = 2.1829, lr_0 = 9.2860e-04
Loss = 1.6743e-01, PNorm = 44.7283, GNorm = 2.8347, lr_0 = 9.2796e-04
Loss = 2.0235e-01, PNorm = 44.7492, GNorm = 1.8271, lr_0 = 9.2733e-04
Loss = 2.1854e-01, PNorm = 44.7865, GNorm = 2.9640, lr_0 = 9.2669e-04
Loss = 2.0558e-01, PNorm = 44.8056, GNorm = 1.1718, lr_0 = 9.2606e-04
Loss = 1.8059e-01, PNorm = 44.8303, GNorm = 3.5515, lr_0 = 9.2542e-04
Loss = 1.7793e-01, PNorm = 44.8533, GNorm = 2.8395, lr_0 = 9.2479e-04
Loss = 1.8254e-01, PNorm = 44.8779, GNorm = 2.5004, lr_0 = 9.2415e-04
Loss = 1.9910e-01, PNorm = 44.8940, GNorm = 1.1189, lr_0 = 9.2352e-04
Loss = 2.2463e-01, PNorm = 44.9178, GNorm = 7.1785, lr_0 = 9.2289e-04
Loss = 2.0049e-01, PNorm = 44.9462, GNorm = 0.7251, lr_0 = 9.2226e-04
Loss = 1.7716e-01, PNorm = 44.9758, GNorm = 1.8958, lr_0 = 9.2162e-04
Loss = 2.0857e-01, PNorm = 44.9917, GNorm = 1.2975, lr_0 = 9.2099e-04
Validation mae = 0.542933
Epoch 3
Loss = 2.5719e-01, PNorm = 45.0237, GNorm = 3.2684, lr_0 = 9.2036e-04
Loss = 2.2259e-01, PNorm = 45.0733, GNorm = 1.5601, lr_0 = 9.1973e-04
Loss = 1.7461e-01, PNorm = 45.1061, GNorm = 1.1523, lr_0 = 9.1910e-04
Loss = 1.7597e-01, PNorm = 45.1287, GNorm = 1.7132, lr_0 = 9.1847e-04
Loss = 1.8524e-01, PNorm = 45.1473, GNorm = 3.3681, lr_0 = 9.1784e-04
Loss = 1.6711e-01, PNorm = 45.1643, GNorm = 2.4083, lr_0 = 9.1721e-04
Loss = 1.6185e-01, PNorm = 45.1882, GNorm = 0.9510, lr_0 = 9.1658e-04
Loss = 2.0914e-01, PNorm = 45.2161, GNorm = 2.1484, lr_0 = 9.1596e-04
Loss = 1.6771e-01, PNorm = 45.2401, GNorm = 0.8128, lr_0 = 9.1533e-04
Loss = 1.5077e-01, PNorm = 45.2541, GNorm = 0.7395, lr_0 = 9.1470e-04
Loss = 1.7886e-01, PNorm = 45.2647, GNorm = 1.0811, lr_0 = 9.1408e-04
Loss = 1.7773e-01, PNorm = 45.2886, GNorm = 1.6455, lr_0 = 9.1345e-04
Loss = 1.5341e-01, PNorm = 45.3133, GNorm = 1.1022, lr_0 = 9.1282e-04
Loss = 1.5787e-01, PNorm = 45.3363, GNorm = 0.7033, lr_0 = 9.1220e-04
Loss = 1.6086e-01, PNorm = 45.3607, GNorm = 0.8714, lr_0 = 9.1157e-04
Loss = 1.5182e-01, PNorm = 45.3856, GNorm = 1.3525, lr_0 = 9.1095e-04
Loss = 1.8281e-01, PNorm = 45.4072, GNorm = 3.2221, lr_0 = 9.1032e-04
Loss = 1.7854e-01, PNorm = 45.4282, GNorm = 2.0200, lr_0 = 9.0970e-04
Loss = 1.5943e-01, PNorm = 45.4525, GNorm = 1.2813, lr_0 = 9.0908e-04
Loss = 1.4834e-01, PNorm = 45.4731, GNorm = 1.0130, lr_0 = 9.0846e-04
Loss = 1.5499e-01, PNorm = 45.4909, GNorm = 1.5651, lr_0 = 9.0783e-04
Loss = 1.6019e-01, PNorm = 45.5110, GNorm = 0.7805, lr_0 = 9.0721e-04
Loss = 1.9092e-01, PNorm = 45.5428, GNorm = 2.2196, lr_0 = 9.0659e-04
Loss = 1.9803e-01, PNorm = 45.5675, GNorm = 3.1385, lr_0 = 9.0597e-04
Loss = 1.6241e-01, PNorm = 45.5964, GNorm = 1.0670, lr_0 = 9.0535e-04
Loss = 1.6758e-01, PNorm = 45.6243, GNorm = 3.1465, lr_0 = 9.0473e-04
Loss = 1.8645e-01, PNorm = 45.6446, GNorm = 0.9029, lr_0 = 9.0411e-04
Loss = 1.8634e-01, PNorm = 45.6630, GNorm = 3.7721, lr_0 = 9.0349e-04
Loss = 1.6682e-01, PNorm = 45.6849, GNorm = 1.3535, lr_0 = 9.0287e-04
Loss = 1.8710e-01, PNorm = 45.7149, GNorm = 4.8783, lr_0 = 9.0225e-04
Loss = 2.0986e-01, PNorm = 45.7367, GNorm = 3.7286, lr_0 = 9.0163e-04
Loss = 1.8483e-01, PNorm = 45.7614, GNorm = 0.6738, lr_0 = 9.0102e-04
Loss = 1.8686e-01, PNorm = 45.7888, GNorm = 2.6604, lr_0 = 9.0040e-04
Loss = 1.5782e-01, PNorm = 45.8070, GNorm = 0.8217, lr_0 = 8.9978e-04
Loss = 1.5181e-01, PNorm = 45.8244, GNorm = 1.5363, lr_0 = 8.9916e-04
Loss = 1.6093e-01, PNorm = 45.8320, GNorm = 1.0525, lr_0 = 8.9855e-04
Loss = 1.8537e-01, PNorm = 45.8409, GNorm = 2.6603, lr_0 = 8.9793e-04
Loss = 1.9249e-01, PNorm = 45.8692, GNorm = 1.8962, lr_0 = 8.9732e-04
Loss = 1.7861e-01, PNorm = 45.9009, GNorm = 1.7156, lr_0 = 8.9670e-04
Loss = 1.9309e-01, PNorm = 45.9224, GNorm = 2.4220, lr_0 = 8.9609e-04
Loss = 1.8707e-01, PNorm = 45.9510, GNorm = 2.3976, lr_0 = 8.9548e-04
Loss = 1.6119e-01, PNorm = 45.9769, GNorm = 1.6441, lr_0 = 8.9486e-04
Loss = 1.9094e-01, PNorm = 46.0036, GNorm = 1.0758, lr_0 = 8.9425e-04
Loss = 1.6951e-01, PNorm = 46.0309, GNorm = 1.5473, lr_0 = 8.9364e-04
Loss = 1.7199e-01, PNorm = 46.0545, GNorm = 2.9430, lr_0 = 8.9302e-04
Loss = 1.6565e-01, PNorm = 46.0670, GNorm = 0.9668, lr_0 = 8.9241e-04
Loss = 1.8605e-01, PNorm = 46.0890, GNorm = 1.0101, lr_0 = 8.9180e-04
Loss = 1.5987e-01, PNorm = 46.1032, GNorm = 1.3997, lr_0 = 8.9119e-04
Loss = 1.6028e-01, PNorm = 46.1240, GNorm = 2.3016, lr_0 = 8.9058e-04
Loss = 1.5557e-01, PNorm = 46.1502, GNorm = 2.2098, lr_0 = 8.8997e-04
Loss = 1.6253e-01, PNorm = 46.1798, GNorm = 0.8227, lr_0 = 8.8936e-04
Loss = 1.9960e-01, PNorm = 46.2097, GNorm = 2.7866, lr_0 = 8.8875e-04
Loss = 1.7064e-01, PNorm = 46.2417, GNorm = 0.7031, lr_0 = 8.8814e-04
Loss = 1.7584e-01, PNorm = 46.2631, GNorm = 1.7831, lr_0 = 8.8753e-04
Loss = 1.9177e-01, PNorm = 46.2855, GNorm = 0.7790, lr_0 = 8.8693e-04
Loss = 1.5636e-01, PNorm = 46.3123, GNorm = 1.6619, lr_0 = 8.8632e-04
Loss = 1.4283e-01, PNorm = 46.3284, GNorm = 0.5125, lr_0 = 8.8571e-04
Loss = 1.4987e-01, PNorm = 46.3436, GNorm = 1.4454, lr_0 = 8.8510e-04
Loss = 1.6898e-01, PNorm = 46.3614, GNorm = 0.7493, lr_0 = 8.8450e-04
Loss = 1.6463e-01, PNorm = 46.3828, GNorm = 2.1108, lr_0 = 8.8389e-04
Loss = 1.6719e-01, PNorm = 46.3993, GNorm = 2.4509, lr_0 = 8.8329e-04
Loss = 1.5819e-01, PNorm = 46.4212, GNorm = 0.9084, lr_0 = 8.8268e-04
Loss = 1.7438e-01, PNorm = 46.4411, GNorm = 1.4409, lr_0 = 8.8208e-04
Loss = 2.0426e-01, PNorm = 46.4604, GNorm = 3.7737, lr_0 = 8.8147e-04
Loss = 1.7858e-01, PNorm = 46.4835, GNorm = 1.6899, lr_0 = 8.8087e-04
Loss = 1.6571e-01, PNorm = 46.5102, GNorm = 0.6070, lr_0 = 8.8026e-04
Loss = 1.5544e-01, PNorm = 46.5363, GNorm = 0.7595, lr_0 = 8.7966e-04
Loss = 1.6971e-01, PNorm = 46.5581, GNorm = 2.4324, lr_0 = 8.7906e-04
Loss = 1.5635e-01, PNorm = 46.5778, GNorm = 1.0343, lr_0 = 8.7846e-04
Loss = 1.9540e-01, PNorm = 46.5894, GNorm = 1.6788, lr_0 = 8.7785e-04
Loss = 1.7383e-01, PNorm = 46.6076, GNorm = 1.5398, lr_0 = 8.7725e-04
Loss = 1.8363e-01, PNorm = 46.6254, GNorm = 0.6846, lr_0 = 8.7665e-04
Loss = 1.4316e-01, PNorm = 46.6472, GNorm = 0.6325, lr_0 = 8.7605e-04
Loss = 1.5769e-01, PNorm = 46.6715, GNorm = 1.4340, lr_0 = 8.7545e-04
Loss = 1.5170e-01, PNorm = 46.6867, GNorm = 1.9403, lr_0 = 8.7485e-04
Loss = 1.6052e-01, PNorm = 46.7039, GNorm = 1.0631, lr_0 = 8.7425e-04
Loss = 1.6969e-01, PNorm = 46.7247, GNorm = 1.4619, lr_0 = 8.7365e-04
Loss = 1.5832e-01, PNorm = 46.7524, GNorm = 1.5947, lr_0 = 8.7306e-04
Loss = 1.5381e-01, PNorm = 46.7740, GNorm = 0.5398, lr_0 = 8.7246e-04
Loss = 1.3969e-01, PNorm = 46.7984, GNorm = 0.8772, lr_0 = 8.7186e-04
Loss = 1.8086e-01, PNorm = 46.8195, GNorm = 2.3117, lr_0 = 8.7126e-04
Loss = 1.9168e-01, PNorm = 46.8433, GNorm = 0.7365, lr_0 = 8.7067e-04
Loss = 1.8705e-01, PNorm = 46.8638, GNorm = 1.6927, lr_0 = 8.7007e-04
Loss = 1.5923e-01, PNorm = 46.8971, GNorm = 1.9690, lr_0 = 8.6947e-04
Loss = 1.5214e-01, PNorm = 46.9247, GNorm = 1.5125, lr_0 = 8.6888e-04
Loss = 1.5409e-01, PNorm = 46.9432, GNorm = 1.2531, lr_0 = 8.6828e-04
Loss = 1.7715e-01, PNorm = 46.9584, GNorm = 2.5907, lr_0 = 8.6769e-04
Loss = 1.7541e-01, PNorm = 46.9861, GNorm = 0.9863, lr_0 = 8.6709e-04
Loss = 1.4245e-01, PNorm = 47.0153, GNorm = 1.3046, lr_0 = 8.6650e-04
Loss = 1.6048e-01, PNorm = 47.0273, GNorm = 0.7626, lr_0 = 8.6590e-04
Loss = 1.6819e-01, PNorm = 47.0390, GNorm = 0.8261, lr_0 = 8.6531e-04
Loss = 1.6889e-01, PNorm = 47.0530, GNorm = 0.7374, lr_0 = 8.6472e-04
Loss = 1.6833e-01, PNorm = 47.0752, GNorm = 0.6337, lr_0 = 8.6413e-04
Loss = 1.7019e-01, PNorm = 47.0968, GNorm = 2.5434, lr_0 = 8.6353e-04
Loss = 1.4798e-01, PNorm = 47.1153, GNorm = 1.1940, lr_0 = 8.6294e-04
Loss = 1.6119e-01, PNorm = 47.1330, GNorm = 0.7097, lr_0 = 8.6235e-04
Loss = 1.9363e-01, PNorm = 47.1638, GNorm = 2.6151, lr_0 = 8.6176e-04
Loss = 2.2379e-01, PNorm = 47.1933, GNorm = 3.1970, lr_0 = 8.6117e-04
Loss = 1.6676e-01, PNorm = 47.2277, GNorm = 1.0270, lr_0 = 8.6058e-04
Loss = 1.7718e-01, PNorm = 47.2616, GNorm = 2.5383, lr_0 = 8.5999e-04
Loss = 1.9390e-01, PNorm = 47.2949, GNorm = 2.7889, lr_0 = 8.5940e-04
Loss = 1.7740e-01, PNorm = 47.3247, GNorm = 1.4315, lr_0 = 8.5881e-04
Loss = 1.6867e-01, PNorm = 47.3479, GNorm = 1.5671, lr_0 = 8.5823e-04
Loss = 1.7617e-01, PNorm = 47.3722, GNorm = 2.7501, lr_0 = 8.5764e-04
Loss = 1.8592e-01, PNorm = 47.3920, GNorm = 0.9345, lr_0 = 8.5705e-04
Loss = 1.6868e-01, PNorm = 47.4194, GNorm = 0.9920, lr_0 = 8.5646e-04
Loss = 1.6921e-01, PNorm = 47.4441, GNorm = 2.2546, lr_0 = 8.5588e-04
Loss = 1.5927e-01, PNorm = 47.4576, GNorm = 1.6598, lr_0 = 8.5529e-04
Loss = 1.8336e-01, PNorm = 47.4799, GNorm = 2.6884, lr_0 = 8.5470e-04
Loss = 1.9580e-01, PNorm = 47.5140, GNorm = 1.0469, lr_0 = 8.5412e-04
Loss = 1.6591e-01, PNorm = 47.5479, GNorm = 1.5309, lr_0 = 8.5353e-04
Loss = 1.5425e-01, PNorm = 47.5697, GNorm = 0.9137, lr_0 = 8.5295e-04
Loss = 1.7150e-01, PNorm = 47.5796, GNorm = 1.8903, lr_0 = 8.5236e-04
Loss = 1.3824e-01, PNorm = 47.5938, GNorm = 0.9733, lr_0 = 8.5178e-04
Loss = 1.6784e-01, PNorm = 47.6053, GNorm = 1.2763, lr_0 = 8.5120e-04
Loss = 1.5628e-01, PNorm = 47.6249, GNorm = 2.0092, lr_0 = 8.5061e-04
Loss = 1.8313e-01, PNorm = 47.6389, GNorm = 0.8632, lr_0 = 8.5003e-04
Loss = 1.6371e-01, PNorm = 47.6665, GNorm = 2.8139, lr_0 = 8.4945e-04
Loss = 1.5397e-01, PNorm = 47.6908, GNorm = 1.3470, lr_0 = 8.4887e-04
Loss = 1.4787e-01, PNorm = 47.7062, GNorm = 0.8878, lr_0 = 8.4828e-04
Validation mae = 0.457608
Epoch 4
Loss = 1.4438e-01, PNorm = 47.7262, GNorm = 1.4203, lr_0 = 8.4770e-04
Loss = 1.4596e-01, PNorm = 47.7516, GNorm = 0.8978, lr_0 = 8.4712e-04
Loss = 1.6015e-01, PNorm = 47.7715, GNorm = 0.9551, lr_0 = 8.4654e-04
Loss = 1.6618e-01, PNorm = 47.7872, GNorm = 1.0434, lr_0 = 8.4596e-04
Loss = 1.6210e-01, PNorm = 47.8150, GNorm = 1.3840, lr_0 = 8.4538e-04
Loss = 1.4520e-01, PNorm = 47.8392, GNorm = 0.9432, lr_0 = 8.4480e-04
Loss = 1.4338e-01, PNorm = 47.8605, GNorm = 0.9158, lr_0 = 8.4423e-04
Loss = 1.4246e-01, PNorm = 47.8745, GNorm = 0.8415, lr_0 = 8.4365e-04
Loss = 1.3336e-01, PNorm = 47.8849, GNorm = 0.8922, lr_0 = 8.4307e-04
Loss = 1.2771e-01, PNorm = 47.9019, GNorm = 1.0478, lr_0 = 8.4249e-04
Loss = 1.4912e-01, PNorm = 47.9163, GNorm = 2.5318, lr_0 = 8.4191e-04
Loss = 1.5347e-01, PNorm = 47.9331, GNorm = 1.1799, lr_0 = 8.4134e-04
Loss = 1.6441e-01, PNorm = 47.9509, GNorm = 1.6375, lr_0 = 8.4076e-04
Loss = 1.4835e-01, PNorm = 47.9680, GNorm = 2.4269, lr_0 = 8.4019e-04
Loss = 1.5882e-01, PNorm = 47.9882, GNorm = 2.5657, lr_0 = 8.3961e-04
Loss = 1.7182e-01, PNorm = 48.0213, GNorm = 3.3025, lr_0 = 8.3903e-04
Loss = 1.9446e-01, PNorm = 48.0523, GNorm = 2.2395, lr_0 = 8.3846e-04
Loss = 1.7997e-01, PNorm = 48.0760, GNorm = 1.6521, lr_0 = 8.3789e-04
Loss = 2.1563e-01, PNorm = 48.1126, GNorm = 2.0462, lr_0 = 8.3731e-04
Loss = 1.7918e-01, PNorm = 48.1477, GNorm = 2.0569, lr_0 = 8.3674e-04
Loss = 1.4607e-01, PNorm = 48.1750, GNorm = 0.8028, lr_0 = 8.3616e-04
Loss = 1.3731e-01, PNorm = 48.1990, GNorm = 1.5084, lr_0 = 8.3559e-04
Loss = 1.4904e-01, PNorm = 48.2179, GNorm = 0.8885, lr_0 = 8.3502e-04
Loss = 1.6863e-01, PNorm = 48.2301, GNorm = 2.3317, lr_0 = 8.3445e-04
Loss = 1.4538e-01, PNorm = 48.2504, GNorm = 1.2395, lr_0 = 8.3388e-04
Loss = 1.4500e-01, PNorm = 48.2760, GNorm = 0.9751, lr_0 = 8.3330e-04
Loss = 1.3774e-01, PNorm = 48.2910, GNorm = 1.3096, lr_0 = 8.3273e-04
Loss = 1.7519e-01, PNorm = 48.3233, GNorm = 1.4565, lr_0 = 8.3216e-04
Loss = 1.7715e-01, PNorm = 48.3509, GNorm = 1.3533, lr_0 = 8.3159e-04
Loss = 1.5912e-01, PNorm = 48.3738, GNorm = 1.5939, lr_0 = 8.3102e-04
Loss = 1.1976e-01, PNorm = 48.3960, GNorm = 1.0470, lr_0 = 8.3045e-04
Loss = 1.6101e-01, PNorm = 48.4236, GNorm = 1.3440, lr_0 = 8.2988e-04
Loss = 1.3864e-01, PNorm = 48.4473, GNorm = 1.5444, lr_0 = 8.2932e-04
Loss = 1.2973e-01, PNorm = 48.4685, GNorm = 1.1631, lr_0 = 8.2875e-04
Loss = 1.5957e-01, PNorm = 48.4900, GNorm = 0.8248, lr_0 = 8.2818e-04
Loss = 1.6799e-01, PNorm = 48.5095, GNorm = 0.8062, lr_0 = 8.2761e-04
Loss = 1.5066e-01, PNorm = 48.5228, GNorm = 1.5628, lr_0 = 8.2705e-04
Loss = 1.6180e-01, PNorm = 48.5415, GNorm = 1.1280, lr_0 = 8.2648e-04
Loss = 1.4811e-01, PNorm = 48.5581, GNorm = 2.3381, lr_0 = 8.2591e-04
Loss = 1.3792e-01, PNorm = 48.5778, GNorm = 0.7893, lr_0 = 8.2535e-04
Loss = 1.6211e-01, PNorm = 48.5965, GNorm = 0.6354, lr_0 = 8.2478e-04
Loss = 1.4164e-01, PNorm = 48.6172, GNorm = 0.5956, lr_0 = 8.2422e-04
Loss = 1.5543e-01, PNorm = 48.6397, GNorm = 1.1285, lr_0 = 8.2365e-04
Loss = 1.5759e-01, PNorm = 48.6552, GNorm = 1.8177, lr_0 = 8.2309e-04
Loss = 1.2966e-01, PNorm = 48.6700, GNorm = 1.8057, lr_0 = 8.2252e-04
Loss = 1.6580e-01, PNorm = 48.6963, GNorm = 1.3412, lr_0 = 8.2196e-04
Loss = 1.8599e-01, PNorm = 48.7174, GNorm = 1.2106, lr_0 = 8.2140e-04
Loss = 1.4610e-01, PNorm = 48.7418, GNorm = 1.0449, lr_0 = 8.2084e-04
Loss = 1.4680e-01, PNorm = 48.7639, GNorm = 1.4545, lr_0 = 8.2027e-04
Loss = 1.4134e-01, PNorm = 48.7768, GNorm = 0.5396, lr_0 = 8.1971e-04
Loss = 1.4319e-01, PNorm = 48.7927, GNorm = 1.4709, lr_0 = 8.1915e-04
Loss = 1.5722e-01, PNorm = 48.8136, GNorm = 0.8389, lr_0 = 8.1859e-04
Loss = 1.5178e-01, PNorm = 48.8330, GNorm = 1.4450, lr_0 = 8.1803e-04
Loss = 1.3727e-01, PNorm = 48.8602, GNorm = 0.9318, lr_0 = 8.1747e-04
Loss = 1.4123e-01, PNorm = 48.8795, GNorm = 1.9306, lr_0 = 8.1691e-04
Loss = 1.5006e-01, PNorm = 48.9001, GNorm = 1.3076, lr_0 = 8.1635e-04
Loss = 1.4946e-01, PNorm = 48.9267, GNorm = 2.4074, lr_0 = 8.1579e-04
Loss = 1.7990e-01, PNorm = 48.9562, GNorm = 1.6546, lr_0 = 8.1523e-04
Loss = 1.4284e-01, PNorm = 48.9794, GNorm = 1.0940, lr_0 = 8.1467e-04
Loss = 1.4424e-01, PNorm = 48.9972, GNorm = 1.2715, lr_0 = 8.1411e-04
Loss = 1.4064e-01, PNorm = 49.0141, GNorm = 0.8361, lr_0 = 8.1355e-04
Loss = 1.4907e-01, PNorm = 49.0303, GNorm = 0.6532, lr_0 = 8.1300e-04
Loss = 1.4926e-01, PNorm = 49.0555, GNorm = 1.1856, lr_0 = 8.1244e-04
Loss = 1.8758e-01, PNorm = 49.0760, GNorm = 2.3896, lr_0 = 8.1188e-04
Loss = 1.7744e-01, PNorm = 49.0976, GNorm = 2.5558, lr_0 = 8.1133e-04
Loss = 1.5941e-01, PNorm = 49.1170, GNorm = 1.4059, lr_0 = 8.1077e-04
Loss = 1.4769e-01, PNorm = 49.1412, GNorm = 0.4418, lr_0 = 8.1022e-04
Loss = 1.3753e-01, PNorm = 49.1580, GNorm = 0.9204, lr_0 = 8.0966e-04
Loss = 1.4520e-01, PNorm = 49.1759, GNorm = 0.8932, lr_0 = 8.0911e-04
Loss = 1.6783e-01, PNorm = 49.1976, GNorm = 1.1927, lr_0 = 8.0855e-04
Loss = 1.3254e-01, PNorm = 49.2171, GNorm = 1.1514, lr_0 = 8.0800e-04
Loss = 1.5734e-01, PNorm = 49.2366, GNorm = 0.7196, lr_0 = 8.0745e-04
Loss = 1.3084e-01, PNorm = 49.2579, GNorm = 0.8462, lr_0 = 8.0689e-04
Loss = 1.4970e-01, PNorm = 49.2735, GNorm = 1.8304, lr_0 = 8.0634e-04
Loss = 1.3286e-01, PNorm = 49.2892, GNorm = 0.9385, lr_0 = 8.0579e-04
Loss = 1.4075e-01, PNorm = 49.3030, GNorm = 2.1157, lr_0 = 8.0523e-04
Loss = 1.4331e-01, PNorm = 49.3177, GNorm = 0.8337, lr_0 = 8.0468e-04
Loss = 1.4113e-01, PNorm = 49.3418, GNorm = 1.6314, lr_0 = 8.0413e-04
Loss = 1.3947e-01, PNorm = 49.3599, GNorm = 1.5848, lr_0 = 8.0358e-04
Loss = 1.3427e-01, PNorm = 49.3845, GNorm = 2.4495, lr_0 = 8.0303e-04
Loss = 1.5283e-01, PNorm = 49.4104, GNorm = 0.9637, lr_0 = 8.0248e-04
Loss = 1.1986e-01, PNorm = 49.4273, GNorm = 0.6717, lr_0 = 8.0193e-04
Loss = 1.3870e-01, PNorm = 49.4462, GNorm = 0.5777, lr_0 = 8.0138e-04
Loss = 1.7130e-01, PNorm = 49.4660, GNorm = 0.9963, lr_0 = 8.0083e-04
Loss = 1.2688e-01, PNorm = 49.4811, GNorm = 1.6125, lr_0 = 8.0028e-04
Loss = 1.3937e-01, PNorm = 49.4923, GNorm = 0.9404, lr_0 = 7.9974e-04
Loss = 1.7716e-01, PNorm = 49.5125, GNorm = 1.4642, lr_0 = 7.9919e-04
Loss = 1.8291e-01, PNorm = 49.5435, GNorm = 3.3129, lr_0 = 7.9864e-04
Loss = 1.7845e-01, PNorm = 49.5752, GNorm = 1.1474, lr_0 = 7.9809e-04
Loss = 1.6585e-01, PNorm = 49.6044, GNorm = 2.1289, lr_0 = 7.9755e-04
Loss = 1.4480e-01, PNorm = 49.6290, GNorm = 0.6343, lr_0 = 7.9700e-04
Loss = 1.6540e-01, PNorm = 49.6525, GNorm = 0.8317, lr_0 = 7.9645e-04
Loss = 1.4888e-01, PNorm = 49.6667, GNorm = 1.4855, lr_0 = 7.9591e-04
Loss = 1.4884e-01, PNorm = 49.6835, GNorm = 1.0622, lr_0 = 7.9536e-04
Loss = 1.6448e-01, PNorm = 49.7000, GNorm = 0.6245, lr_0 = 7.9482e-04
Loss = 1.6302e-01, PNorm = 49.7196, GNorm = 1.4965, lr_0 = 7.9427e-04
Loss = 1.5742e-01, PNorm = 49.7418, GNorm = 1.4836, lr_0 = 7.9373e-04
Loss = 1.5892e-01, PNorm = 49.7629, GNorm = 1.7617, lr_0 = 7.9319e-04
Loss = 1.4463e-01, PNorm = 49.7769, GNorm = 0.9559, lr_0 = 7.9264e-04
Loss = 1.4271e-01, PNorm = 49.7934, GNorm = 0.8665, lr_0 = 7.9210e-04
Loss = 1.4715e-01, PNorm = 49.8077, GNorm = 1.7908, lr_0 = 7.9156e-04
Loss = 1.6205e-01, PNorm = 49.8237, GNorm = 0.7614, lr_0 = 7.9101e-04
Loss = 1.8279e-01, PNorm = 49.8500, GNorm = 0.9880, lr_0 = 7.9047e-04
Loss = 1.5760e-01, PNorm = 49.8761, GNorm = 1.2203, lr_0 = 7.8993e-04
Loss = 1.3996e-01, PNorm = 49.9009, GNorm = 1.1361, lr_0 = 7.8939e-04
Loss = 1.5526e-01, PNorm = 49.9171, GNorm = 1.8520, lr_0 = 7.8885e-04
Loss = 1.6607e-01, PNorm = 49.9298, GNorm = 0.6016, lr_0 = 7.8831e-04
Loss = 1.7591e-01, PNorm = 49.9500, GNorm = 1.9168, lr_0 = 7.8777e-04
Loss = 1.6938e-01, PNorm = 49.9816, GNorm = 1.4714, lr_0 = 7.8723e-04
Loss = 1.6970e-01, PNorm = 50.0121, GNorm = 0.7567, lr_0 = 7.8669e-04
Loss = 1.3342e-01, PNorm = 50.0408, GNorm = 1.1890, lr_0 = 7.8615e-04
Loss = 1.7153e-01, PNorm = 50.0681, GNorm = 1.1155, lr_0 = 7.8561e-04
Loss = 1.5048e-01, PNorm = 50.0829, GNorm = 0.6494, lr_0 = 7.8507e-04
Loss = 1.5893e-01, PNorm = 50.1032, GNorm = 0.7812, lr_0 = 7.8454e-04
Loss = 1.5340e-01, PNorm = 50.1292, GNorm = 1.3571, lr_0 = 7.8400e-04
Loss = 1.5413e-01, PNorm = 50.1438, GNorm = 1.0202, lr_0 = 7.8346e-04
Loss = 1.4773e-01, PNorm = 50.1636, GNorm = 1.9938, lr_0 = 7.8293e-04
Loss = 1.3385e-01, PNorm = 50.1931, GNorm = 0.5829, lr_0 = 7.8239e-04
Loss = 1.4609e-01, PNorm = 50.2137, GNorm = 0.7253, lr_0 = 7.8185e-04
Loss = 1.5228e-01, PNorm = 50.2372, GNorm = 0.7482, lr_0 = 7.8132e-04
Validation mae = 0.435988
Epoch 5
Loss = 1.3981e-01, PNorm = 50.2512, GNorm = 0.8634, lr_0 = 7.8078e-04
Loss = 1.1536e-01, PNorm = 50.2627, GNorm = 0.6571, lr_0 = 7.8025e-04
Loss = 1.4034e-01, PNorm = 50.2750, GNorm = 1.7290, lr_0 = 7.7971e-04
Loss = 1.2021e-01, PNorm = 50.2931, GNorm = 1.2845, lr_0 = 7.7918e-04
Loss = 1.4992e-01, PNorm = 50.3158, GNorm = 1.1525, lr_0 = 7.7864e-04
Loss = 1.3171e-01, PNorm = 50.3322, GNorm = 0.7590, lr_0 = 7.7811e-04
Loss = 1.4435e-01, PNorm = 50.3457, GNorm = 1.3762, lr_0 = 7.7758e-04
Loss = 1.5074e-01, PNorm = 50.3625, GNorm = 1.2549, lr_0 = 7.7705e-04
Loss = 1.1917e-01, PNorm = 50.3825, GNorm = 0.7158, lr_0 = 7.7651e-04
Loss = 1.1692e-01, PNorm = 50.3997, GNorm = 0.7342, lr_0 = 7.7598e-04
Loss = 1.2895e-01, PNorm = 50.4200, GNorm = 2.3456, lr_0 = 7.7545e-04
Loss = 1.3351e-01, PNorm = 50.4442, GNorm = 0.8090, lr_0 = 7.7492e-04
Loss = 1.4487e-01, PNorm = 50.4644, GNorm = 0.8804, lr_0 = 7.7439e-04
Loss = 1.4116e-01, PNorm = 50.4792, GNorm = 0.5458, lr_0 = 7.7386e-04
Loss = 1.5875e-01, PNorm = 50.5000, GNorm = 0.8085, lr_0 = 7.7333e-04
Loss = 1.5264e-01, PNorm = 50.5290, GNorm = 1.0407, lr_0 = 7.7280e-04
Loss = 1.1981e-01, PNorm = 50.5520, GNorm = 0.6554, lr_0 = 7.7227e-04
Loss = 1.6875e-01, PNorm = 50.5689, GNorm = 1.9444, lr_0 = 7.7174e-04
Loss = 1.6575e-01, PNorm = 50.5958, GNorm = 0.7245, lr_0 = 7.7121e-04
Loss = 1.5282e-01, PNorm = 50.6166, GNorm = 1.2230, lr_0 = 7.7068e-04
Loss = 1.4933e-01, PNorm = 50.6397, GNorm = 0.5469, lr_0 = 7.7015e-04
Loss = 1.3999e-01, PNorm = 50.6567, GNorm = 0.6029, lr_0 = 7.6963e-04
Loss = 1.2332e-01, PNorm = 50.6759, GNorm = 1.6291, lr_0 = 7.6910e-04
Loss = 1.6853e-01, PNorm = 50.6908, GNorm = 1.2729, lr_0 = 7.6857e-04
Loss = 1.6056e-01, PNorm = 50.7072, GNorm = 0.6390, lr_0 = 7.6805e-04
Loss = 1.6202e-01, PNorm = 50.7351, GNorm = 1.0376, lr_0 = 7.6752e-04
Loss = 1.4480e-01, PNorm = 50.7587, GNorm = 0.4943, lr_0 = 7.6699e-04
Loss = 1.3402e-01, PNorm = 50.7808, GNorm = 0.8151, lr_0 = 7.6647e-04
Loss = 1.5959e-01, PNorm = 50.8102, GNorm = 1.1169, lr_0 = 7.6594e-04
Loss = 1.2962e-01, PNorm = 50.8345, GNorm = 0.5880, lr_0 = 7.6542e-04
Loss = 1.2890e-01, PNorm = 50.8480, GNorm = 1.5080, lr_0 = 7.6489e-04
Loss = 1.3002e-01, PNorm = 50.8603, GNorm = 0.7138, lr_0 = 7.6437e-04
Loss = 1.2860e-01, PNorm = 50.8770, GNorm = 0.8857, lr_0 = 7.6385e-04
Loss = 1.4545e-01, PNorm = 50.8972, GNorm = 1.6106, lr_0 = 7.6332e-04
Loss = 1.6604e-01, PNorm = 50.9187, GNorm = 1.4217, lr_0 = 7.6280e-04
Loss = 1.3355e-01, PNorm = 50.9500, GNorm = 1.8152, lr_0 = 7.6228e-04
Loss = 1.4239e-01, PNorm = 50.9696, GNorm = 0.7287, lr_0 = 7.6176e-04
Loss = 1.3885e-01, PNorm = 50.9881, GNorm = 1.1335, lr_0 = 7.6123e-04
Loss = 1.4307e-01, PNorm = 51.0078, GNorm = 1.0126, lr_0 = 7.6071e-04
Loss = 1.2891e-01, PNorm = 51.0327, GNorm = 0.6324, lr_0 = 7.6019e-04
Loss = 1.4057e-01, PNorm = 51.0538, GNorm = 0.9159, lr_0 = 7.5967e-04
Loss = 1.4187e-01, PNorm = 51.0775, GNorm = 0.5394, lr_0 = 7.5915e-04
Loss = 1.3241e-01, PNorm = 51.0905, GNorm = 0.8421, lr_0 = 7.5863e-04
Loss = 1.3766e-01, PNorm = 51.0996, GNorm = 0.5145, lr_0 = 7.5811e-04
Loss = 1.2482e-01, PNorm = 51.1215, GNorm = 0.6061, lr_0 = 7.5759e-04
Loss = 1.4584e-01, PNorm = 51.1420, GNorm = 1.3811, lr_0 = 7.5707e-04
Loss = 1.3149e-01, PNorm = 51.1596, GNorm = 1.9608, lr_0 = 7.5655e-04
Loss = 1.4073e-01, PNorm = 51.1727, GNorm = 0.7464, lr_0 = 7.5603e-04
Loss = 1.4066e-01, PNorm = 51.1909, GNorm = 0.8143, lr_0 = 7.5552e-04
Loss = 1.3626e-01, PNorm = 51.2075, GNorm = 0.8083, lr_0 = 7.5500e-04
Loss = 1.2513e-01, PNorm = 51.2215, GNorm = 0.9022, lr_0 = 7.5448e-04
Loss = 1.5495e-01, PNorm = 51.2285, GNorm = 1.7203, lr_0 = 7.5397e-04
Loss = 1.4131e-01, PNorm = 51.2435, GNorm = 1.1252, lr_0 = 7.5345e-04
Loss = 1.3642e-01, PNorm = 51.2644, GNorm = 1.4480, lr_0 = 7.5293e-04
Loss = 1.4287e-01, PNorm = 51.2828, GNorm = 1.8381, lr_0 = 7.5242e-04
Loss = 1.4507e-01, PNorm = 51.3049, GNorm = 0.5629, lr_0 = 7.5190e-04
Loss = 1.1992e-01, PNorm = 51.3277, GNorm = 1.1224, lr_0 = 7.5139e-04
Loss = 1.3043e-01, PNorm = 51.3440, GNorm = 0.5775, lr_0 = 7.5087e-04
Loss = 1.3978e-01, PNorm = 51.3621, GNorm = 0.8483, lr_0 = 7.5036e-04
Loss = 1.4750e-01, PNorm = 51.3799, GNorm = 1.0058, lr_0 = 7.4984e-04
Loss = 1.3626e-01, PNorm = 51.3963, GNorm = 0.7243, lr_0 = 7.4933e-04
Loss = 1.3709e-01, PNorm = 51.4166, GNorm = 1.8174, lr_0 = 7.4882e-04
Loss = 1.1169e-01, PNorm = 51.4352, GNorm = 1.1646, lr_0 = 7.4830e-04
Loss = 1.4166e-01, PNorm = 51.4541, GNorm = 0.6137, lr_0 = 7.4779e-04
Loss = 1.4151e-01, PNorm = 51.4694, GNorm = 0.7878, lr_0 = 7.4728e-04
Loss = 1.2435e-01, PNorm = 51.4852, GNorm = 1.1782, lr_0 = 7.4677e-04
Loss = 1.3686e-01, PNorm = 51.5004, GNorm = 0.9515, lr_0 = 7.4625e-04
Loss = 1.4395e-01, PNorm = 51.5216, GNorm = 0.6235, lr_0 = 7.4574e-04
Loss = 1.2329e-01, PNorm = 51.5390, GNorm = 2.1154, lr_0 = 7.4523e-04
Loss = 1.5737e-01, PNorm = 51.5518, GNorm = 0.7832, lr_0 = 7.4472e-04
Loss = 1.4600e-01, PNorm = 51.5695, GNorm = 0.9795, lr_0 = 7.4421e-04
Loss = 1.7001e-01, PNorm = 51.5957, GNorm = 1.3968, lr_0 = 7.4370e-04
Loss = 1.4685e-01, PNorm = 51.6230, GNorm = 0.8879, lr_0 = 7.4319e-04
Loss = 1.3057e-01, PNorm = 51.6503, GNorm = 1.5438, lr_0 = 7.4268e-04
Loss = 1.3164e-01, PNorm = 51.6715, GNorm = 1.7519, lr_0 = 7.4217e-04
Loss = 1.4229e-01, PNorm = 51.6864, GNorm = 1.2843, lr_0 = 7.4167e-04
Loss = 1.3642e-01, PNorm = 51.7069, GNorm = 1.2688, lr_0 = 7.4116e-04
Loss = 1.3933e-01, PNorm = 51.7210, GNorm = 1.1814, lr_0 = 7.4065e-04
Loss = 1.3080e-01, PNorm = 51.7386, GNorm = 1.1666, lr_0 = 7.4014e-04
Loss = 1.4133e-01, PNorm = 51.7555, GNorm = 0.8067, lr_0 = 7.3964e-04
Loss = 1.5047e-01, PNorm = 51.7771, GNorm = 2.1159, lr_0 = 7.3913e-04
Loss = 1.2630e-01, PNorm = 51.7980, GNorm = 1.4916, lr_0 = 7.3862e-04
Loss = 1.3458e-01, PNorm = 51.8087, GNorm = 1.1227, lr_0 = 7.3812e-04
Loss = 1.2210e-01, PNorm = 51.8241, GNorm = 1.1176, lr_0 = 7.3761e-04
Loss = 1.3857e-01, PNorm = 51.8360, GNorm = 1.5090, lr_0 = 7.3711e-04
Loss = 1.4647e-01, PNorm = 51.8542, GNorm = 1.8195, lr_0 = 7.3660e-04
Loss = 1.4200e-01, PNorm = 51.8799, GNorm = 0.8404, lr_0 = 7.3610e-04
Loss = 1.4897e-01, PNorm = 51.9046, GNorm = 1.2793, lr_0 = 7.3559e-04
Loss = 1.3609e-01, PNorm = 51.9242, GNorm = 0.7067, lr_0 = 7.3509e-04
Loss = 1.2402e-01, PNorm = 51.9415, GNorm = 0.6553, lr_0 = 7.3458e-04
Loss = 1.2282e-01, PNorm = 51.9599, GNorm = 1.2627, lr_0 = 7.3408e-04
Loss = 1.3702e-01, PNorm = 51.9740, GNorm = 1.4874, lr_0 = 7.3358e-04
Loss = 1.5482e-01, PNorm = 51.9904, GNorm = 0.7903, lr_0 = 7.3308e-04
Loss = 1.6355e-01, PNorm = 52.0120, GNorm = 2.0853, lr_0 = 7.3257e-04
Loss = 1.5735e-01, PNorm = 52.0298, GNorm = 2.4322, lr_0 = 7.3207e-04
Loss = 1.5623e-01, PNorm = 52.0484, GNorm = 1.1305, lr_0 = 7.3157e-04
Loss = 1.4528e-01, PNorm = 52.0791, GNorm = 0.7587, lr_0 = 7.3107e-04
Loss = 1.5052e-01, PNorm = 52.0963, GNorm = 1.7365, lr_0 = 7.3057e-04
Loss = 1.4602e-01, PNorm = 52.1122, GNorm = 1.3928, lr_0 = 7.3007e-04
Loss = 1.2890e-01, PNorm = 52.1275, GNorm = 0.8321, lr_0 = 7.2957e-04
Loss = 1.4224e-01, PNorm = 52.1440, GNorm = 0.6583, lr_0 = 7.2907e-04
Loss = 1.3668e-01, PNorm = 52.1580, GNorm = 1.4929, lr_0 = 7.2857e-04
Loss = 1.3040e-01, PNorm = 52.1688, GNorm = 0.7463, lr_0 = 7.2807e-04
Loss = 1.4199e-01, PNorm = 52.1815, GNorm = 0.8589, lr_0 = 7.2757e-04
Loss = 1.2987e-01, PNorm = 52.1930, GNorm = 0.9437, lr_0 = 7.2707e-04
Loss = 1.3143e-01, PNorm = 52.2072, GNorm = 0.5927, lr_0 = 7.2657e-04
Loss = 1.2861e-01, PNorm = 52.2222, GNorm = 1.5843, lr_0 = 7.2608e-04
Loss = 1.2446e-01, PNorm = 52.2425, GNorm = 1.0927, lr_0 = 7.2558e-04
Loss = 1.2941e-01, PNorm = 52.2603, GNorm = 0.6074, lr_0 = 7.2508e-04
Loss = 1.2760e-01, PNorm = 52.2735, GNorm = 1.5192, lr_0 = 7.2458e-04
Loss = 1.5564e-01, PNorm = 52.2936, GNorm = 1.7989, lr_0 = 7.2409e-04
Loss = 1.2055e-01, PNorm = 52.3195, GNorm = 1.6115, lr_0 = 7.2359e-04
Loss = 1.3050e-01, PNorm = 52.3305, GNorm = 0.7318, lr_0 = 7.2310e-04
Loss = 1.2968e-01, PNorm = 52.3413, GNorm = 1.2099, lr_0 = 7.2260e-04
Loss = 1.3706e-01, PNorm = 52.3633, GNorm = 0.5924, lr_0 = 7.2211e-04
Loss = 1.3344e-01, PNorm = 52.3929, GNorm = 2.1775, lr_0 = 7.2161e-04
Loss = 1.7010e-01, PNorm = 52.4094, GNorm = 1.4307, lr_0 = 7.2112e-04
Loss = 1.5401e-01, PNorm = 52.4353, GNorm = 0.4959, lr_0 = 7.2062e-04
Loss = 1.4325e-01, PNorm = 52.4607, GNorm = 1.4888, lr_0 = 7.2013e-04
Loss = 1.1488e-01, PNorm = 52.4835, GNorm = 0.6775, lr_0 = 7.1964e-04
Validation mae = 0.425433
Epoch 6
Loss = 1.2714e-01, PNorm = 52.5127, GNorm = 0.4908, lr_0 = 7.1914e-04
Loss = 1.2658e-01, PNorm = 52.5361, GNorm = 1.1510, lr_0 = 7.1865e-04
Loss = 1.0774e-01, PNorm = 52.5561, GNorm = 0.8107, lr_0 = 7.1816e-04
Loss = 1.1735e-01, PNorm = 52.5860, GNorm = 0.9832, lr_0 = 7.1767e-04
Loss = 1.2902e-01, PNorm = 52.6040, GNorm = 0.6608, lr_0 = 7.1717e-04
Loss = 1.3838e-01, PNorm = 52.6224, GNorm = 1.6160, lr_0 = 7.1668e-04
Loss = 1.1358e-01, PNorm = 52.6424, GNorm = 0.8879, lr_0 = 7.1619e-04
Loss = 1.2888e-01, PNorm = 52.6650, GNorm = 1.2148, lr_0 = 7.1570e-04
Loss = 1.3523e-01, PNorm = 52.6902, GNorm = 0.9997, lr_0 = 7.1521e-04
Loss = 1.3092e-01, PNorm = 52.7113, GNorm = 0.8186, lr_0 = 7.1472e-04
Loss = 1.2039e-01, PNorm = 52.7302, GNorm = 0.5626, lr_0 = 7.1423e-04
Loss = 1.0742e-01, PNorm = 52.7429, GNorm = 0.7822, lr_0 = 7.1374e-04
Loss = 1.3892e-01, PNorm = 52.7488, GNorm = 0.7259, lr_0 = 7.1325e-04
Loss = 1.3618e-01, PNorm = 52.7713, GNorm = 0.9413, lr_0 = 7.1277e-04
Loss = 1.4901e-01, PNorm = 52.7881, GNorm = 1.0740, lr_0 = 7.1228e-04
Loss = 1.3087e-01, PNorm = 52.8149, GNorm = 0.7284, lr_0 = 7.1179e-04
Loss = 1.4341e-01, PNorm = 52.8417, GNorm = 2.0007, lr_0 = 7.1130e-04
Loss = 1.3058e-01, PNorm = 52.8650, GNorm = 1.0648, lr_0 = 7.1081e-04
Loss = 1.1738e-01, PNorm = 52.8891, GNorm = 0.8883, lr_0 = 7.1033e-04
Loss = 1.2457e-01, PNorm = 52.9070, GNorm = 0.8992, lr_0 = 7.0984e-04
Loss = 1.3669e-01, PNorm = 52.9236, GNorm = 1.1228, lr_0 = 7.0935e-04
Loss = 1.2458e-01, PNorm = 52.9332, GNorm = 1.9846, lr_0 = 7.0887e-04
Loss = 1.3188e-01, PNorm = 52.9487, GNorm = 0.8425, lr_0 = 7.0838e-04
Loss = 1.2569e-01, PNorm = 52.9670, GNorm = 0.9780, lr_0 = 7.0790e-04
Loss = 1.2074e-01, PNorm = 52.9847, GNorm = 0.9151, lr_0 = 7.0741e-04
Loss = 1.3103e-01, PNorm = 53.0066, GNorm = 0.8769, lr_0 = 7.0693e-04
Loss = 1.1736e-01, PNorm = 53.0206, GNorm = 1.1244, lr_0 = 7.0644e-04
Loss = 1.2090e-01, PNorm = 53.0359, GNorm = 1.1458, lr_0 = 7.0596e-04
Loss = 1.3776e-01, PNorm = 53.0507, GNorm = 1.0004, lr_0 = 7.0548e-04
Loss = 1.2039e-01, PNorm = 53.0688, GNorm = 1.1926, lr_0 = 7.0499e-04
Loss = 1.3641e-01, PNorm = 53.0875, GNorm = 1.1330, lr_0 = 7.0451e-04
Loss = 1.2134e-01, PNorm = 53.1040, GNorm = 1.4711, lr_0 = 7.0403e-04
Loss = 1.3289e-01, PNorm = 53.1202, GNorm = 0.5568, lr_0 = 7.0354e-04
Loss = 1.2704e-01, PNorm = 53.1397, GNorm = 1.3276, lr_0 = 7.0306e-04
Loss = 1.2450e-01, PNorm = 53.1589, GNorm = 1.4020, lr_0 = 7.0258e-04
Loss = 1.1977e-01, PNorm = 53.1727, GNorm = 0.7856, lr_0 = 7.0210e-04
Loss = 1.4598e-01, PNorm = 53.1857, GNorm = 0.9044, lr_0 = 7.0162e-04
Loss = 1.2324e-01, PNorm = 53.2026, GNorm = 1.4829, lr_0 = 7.0114e-04
Loss = 1.2802e-01, PNorm = 53.2202, GNorm = 1.2491, lr_0 = 7.0066e-04
Loss = 1.3445e-01, PNorm = 53.2413, GNorm = 0.5948, lr_0 = 7.0018e-04
Loss = 1.2361e-01, PNorm = 53.2591, GNorm = 0.7937, lr_0 = 6.9970e-04
Loss = 1.3105e-01, PNorm = 53.2815, GNorm = 0.6767, lr_0 = 6.9922e-04
Loss = 1.3009e-01, PNorm = 53.2968, GNorm = 1.3931, lr_0 = 6.9874e-04
Loss = 1.1057e-01, PNorm = 53.3067, GNorm = 0.8447, lr_0 = 6.9826e-04
Loss = 1.2645e-01, PNorm = 53.3219, GNorm = 0.6777, lr_0 = 6.9778e-04
Loss = 1.2398e-01, PNorm = 53.3396, GNorm = 0.5799, lr_0 = 6.9730e-04
Loss = 1.3557e-01, PNorm = 53.3604, GNorm = 1.5299, lr_0 = 6.9683e-04
Loss = 1.3552e-01, PNorm = 53.3798, GNorm = 0.6524, lr_0 = 6.9635e-04
Loss = 1.1668e-01, PNorm = 53.3975, GNorm = 0.8802, lr_0 = 6.9587e-04
Loss = 1.1361e-01, PNorm = 53.4043, GNorm = 0.9351, lr_0 = 6.9540e-04
Loss = 1.2224e-01, PNorm = 53.4153, GNorm = 1.0154, lr_0 = 6.9492e-04
Loss = 1.0331e-01, PNorm = 53.4320, GNorm = 0.9916, lr_0 = 6.9444e-04
Loss = 1.2147e-01, PNorm = 53.4481, GNorm = 1.4647, lr_0 = 6.9397e-04
Loss = 1.2575e-01, PNorm = 53.4659, GNorm = 1.0552, lr_0 = 6.9349e-04
Loss = 1.2891e-01, PNorm = 53.4875, GNorm = 0.6504, lr_0 = 6.9302e-04
Loss = 1.1215e-01, PNorm = 53.5006, GNorm = 1.4664, lr_0 = 6.9254e-04
Loss = 1.3652e-01, PNorm = 53.5156, GNorm = 0.8485, lr_0 = 6.9207e-04
Loss = 1.1052e-01, PNorm = 53.5357, GNorm = 0.9818, lr_0 = 6.9159e-04
Loss = 1.3030e-01, PNorm = 53.5448, GNorm = 1.3927, lr_0 = 6.9112e-04
Loss = 1.1330e-01, PNorm = 53.5555, GNorm = 1.0037, lr_0 = 6.9065e-04
Loss = 1.3250e-01, PNorm = 53.5690, GNorm = 1.1648, lr_0 = 6.9017e-04
Loss = 1.4939e-01, PNorm = 53.5878, GNorm = 2.2464, lr_0 = 6.8970e-04
Loss = 1.2033e-01, PNorm = 53.6087, GNorm = 0.6645, lr_0 = 6.8923e-04
Loss = 1.2670e-01, PNorm = 53.6320, GNorm = 1.3642, lr_0 = 6.8876e-04
Loss = 1.2728e-01, PNorm = 53.6551, GNorm = 1.3056, lr_0 = 6.8828e-04
Loss = 1.4895e-01, PNorm = 53.6765, GNorm = 1.4406, lr_0 = 6.8781e-04
Loss = 1.2336e-01, PNorm = 53.6979, GNorm = 1.1600, lr_0 = 6.8734e-04
Loss = 1.4275e-01, PNorm = 53.7140, GNorm = 0.7853, lr_0 = 6.8687e-04
Loss = 1.3000e-01, PNorm = 53.7299, GNorm = 0.6019, lr_0 = 6.8640e-04
Loss = 1.4236e-01, PNorm = 53.7493, GNorm = 1.5420, lr_0 = 6.8593e-04
Loss = 1.2544e-01, PNorm = 53.7672, GNorm = 0.5481, lr_0 = 6.8546e-04
Loss = 1.1455e-01, PNorm = 53.7864, GNorm = 0.8875, lr_0 = 6.8499e-04
Loss = 1.2555e-01, PNorm = 53.8066, GNorm = 1.5600, lr_0 = 6.8452e-04
Loss = 1.3036e-01, PNorm = 53.8192, GNorm = 1.5524, lr_0 = 6.8405e-04
Loss = 1.3515e-01, PNorm = 53.8317, GNorm = 1.1225, lr_0 = 6.8358e-04
Loss = 1.1619e-01, PNorm = 53.8512, GNorm = 0.7538, lr_0 = 6.8312e-04
Loss = 1.4144e-01, PNorm = 53.8764, GNorm = 1.1046, lr_0 = 6.8265e-04
Loss = 1.4690e-01, PNorm = 53.9008, GNorm = 1.6563, lr_0 = 6.8218e-04
Loss = 1.5450e-01, PNorm = 53.9199, GNorm = 1.3408, lr_0 = 6.8171e-04
Loss = 1.2999e-01, PNorm = 53.9423, GNorm = 0.6944, lr_0 = 6.8125e-04
Loss = 1.2839e-01, PNorm = 53.9652, GNorm = 0.6383, lr_0 = 6.8078e-04
Loss = 1.4156e-01, PNorm = 53.9842, GNorm = 0.7698, lr_0 = 6.8031e-04
Loss = 1.2953e-01, PNorm = 54.0063, GNorm = 1.0942, lr_0 = 6.7985e-04
Loss = 1.2390e-01, PNorm = 54.0253, GNorm = 1.1568, lr_0 = 6.7938e-04
Loss = 1.4265e-01, PNorm = 54.0468, GNorm = 1.5374, lr_0 = 6.7892e-04
Loss = 1.5297e-01, PNorm = 54.0680, GNorm = 0.7047, lr_0 = 6.7845e-04
Loss = 1.4039e-01, PNorm = 54.0918, GNorm = 0.6806, lr_0 = 6.7799e-04
Loss = 1.1043e-01, PNorm = 54.1083, GNorm = 0.4162, lr_0 = 6.7752e-04
Loss = 1.2140e-01, PNorm = 54.1246, GNorm = 0.8550, lr_0 = 6.7706e-04
Loss = 1.2748e-01, PNorm = 54.1471, GNorm = 0.8009, lr_0 = 6.7659e-04
Loss = 1.2418e-01, PNorm = 54.1654, GNorm = 1.1436, lr_0 = 6.7613e-04
Loss = 1.2403e-01, PNorm = 54.1766, GNorm = 1.1345, lr_0 = 6.7567e-04
Loss = 1.5091e-01, PNorm = 54.1885, GNorm = 0.7164, lr_0 = 6.7520e-04
Loss = 1.2450e-01, PNorm = 54.2101, GNorm = 0.7778, lr_0 = 6.7474e-04
Loss = 1.1799e-01, PNorm = 54.2307, GNorm = 0.5567, lr_0 = 6.7428e-04
Loss = 1.3784e-01, PNorm = 54.2458, GNorm = 1.2574, lr_0 = 6.7382e-04
Loss = 1.2667e-01, PNorm = 54.2594, GNorm = 1.8337, lr_0 = 6.7335e-04
Loss = 1.1845e-01, PNorm = 54.2769, GNorm = 1.4499, lr_0 = 6.7289e-04
Loss = 1.4006e-01, PNorm = 54.2952, GNorm = 1.1928, lr_0 = 6.7243e-04
Loss = 1.2179e-01, PNorm = 54.3127, GNorm = 1.9876, lr_0 = 6.7197e-04
Loss = 1.1782e-01, PNorm = 54.3235, GNorm = 0.5541, lr_0 = 6.7151e-04
Loss = 1.3117e-01, PNorm = 54.3395, GNorm = 0.8064, lr_0 = 6.7105e-04
Loss = 1.1857e-01, PNorm = 54.3534, GNorm = 0.6024, lr_0 = 6.7059e-04
Loss = 1.4343e-01, PNorm = 54.3710, GNorm = 0.6472, lr_0 = 6.7013e-04
Loss = 1.2919e-01, PNorm = 54.3905, GNorm = 0.6787, lr_0 = 6.6967e-04
Loss = 1.1420e-01, PNorm = 54.4052, GNorm = 0.7264, lr_0 = 6.6921e-04
Loss = 1.5144e-01, PNorm = 54.4268, GNorm = 0.8515, lr_0 = 6.6876e-04
Loss = 1.4771e-01, PNorm = 54.4413, GNorm = 0.9398, lr_0 = 6.6830e-04
Loss = 1.2251e-01, PNorm = 54.4614, GNorm = 1.4394, lr_0 = 6.6784e-04
Loss = 1.3660e-01, PNorm = 54.4791, GNorm = 1.0645, lr_0 = 6.6738e-04
Loss = 1.3233e-01, PNorm = 54.4935, GNorm = 1.1083, lr_0 = 6.6693e-04
Loss = 1.2934e-01, PNorm = 54.5060, GNorm = 0.7405, lr_0 = 6.6647e-04
Loss = 1.1143e-01, PNorm = 54.5230, GNorm = 0.8526, lr_0 = 6.6601e-04
Loss = 1.0661e-01, PNorm = 54.5373, GNorm = 0.6193, lr_0 = 6.6556e-04
Loss = 1.3616e-01, PNorm = 54.5530, GNorm = 0.9401, lr_0 = 6.6510e-04
Loss = 1.2019e-01, PNorm = 54.5621, GNorm = 0.8600, lr_0 = 6.6464e-04
Loss = 1.3436e-01, PNorm = 54.5802, GNorm = 1.2064, lr_0 = 6.6419e-04
Loss = 1.3818e-01, PNorm = 54.6057, GNorm = 0.7301, lr_0 = 6.6373e-04
Loss = 1.2785e-01, PNorm = 54.6215, GNorm = 0.6154, lr_0 = 6.6328e-04
Loss = 1.1001e-01, PNorm = 54.6323, GNorm = 0.7355, lr_0 = 6.6282e-04
Validation mae = 0.424061
Epoch 7
Loss = 1.1209e-01, PNorm = 54.6511, GNorm = 0.8418, lr_0 = 6.6237e-04
Loss = 1.0270e-01, PNorm = 54.6731, GNorm = 0.5409, lr_0 = 6.6192e-04
Loss = 1.1193e-01, PNorm = 54.6893, GNorm = 0.5007, lr_0 = 6.6146e-04
Loss = 1.2406e-01, PNorm = 54.7080, GNorm = 1.3347, lr_0 = 6.6101e-04
Loss = 1.1185e-01, PNorm = 54.7189, GNorm = 0.6631, lr_0 = 6.6056e-04
Loss = 1.3586e-01, PNorm = 54.7291, GNorm = 2.1472, lr_0 = 6.6011e-04
Loss = 1.1785e-01, PNorm = 54.7592, GNorm = 0.6578, lr_0 = 6.5965e-04
Loss = 1.0307e-01, PNorm = 54.7827, GNorm = 1.2472, lr_0 = 6.5920e-04
Loss = 1.2607e-01, PNorm = 54.7986, GNorm = 0.7894, lr_0 = 6.5875e-04
Loss = 9.4577e-02, PNorm = 54.8149, GNorm = 0.6648, lr_0 = 6.5830e-04
Loss = 1.2357e-01, PNorm = 54.8337, GNorm = 1.5722, lr_0 = 6.5785e-04
Loss = 1.3057e-01, PNorm = 54.8546, GNorm = 0.8693, lr_0 = 6.5740e-04
Loss = 1.3395e-01, PNorm = 54.8747, GNorm = 0.9088, lr_0 = 6.5695e-04
Loss = 1.1285e-01, PNorm = 54.8980, GNorm = 0.5311, lr_0 = 6.5650e-04
Loss = 1.1473e-01, PNorm = 54.9180, GNorm = 0.7747, lr_0 = 6.5605e-04
Loss = 1.2049e-01, PNorm = 54.9319, GNorm = 0.6333, lr_0 = 6.5560e-04
Loss = 1.2940e-01, PNorm = 54.9442, GNorm = 1.7765, lr_0 = 6.5515e-04
Loss = 1.0716e-01, PNorm = 54.9634, GNorm = 1.2019, lr_0 = 6.5470e-04
Loss = 1.1948e-01, PNorm = 54.9804, GNorm = 1.0291, lr_0 = 6.5425e-04
Loss = 1.2201e-01, PNorm = 54.9957, GNorm = 0.8523, lr_0 = 6.5380e-04
Loss = 1.2082e-01, PNorm = 55.0162, GNorm = 1.6373, lr_0 = 6.5335e-04
Loss = 1.0240e-01, PNorm = 55.0365, GNorm = 0.9590, lr_0 = 6.5291e-04
Loss = 1.1181e-01, PNorm = 55.0510, GNorm = 0.7767, lr_0 = 6.5246e-04
Loss = 1.0800e-01, PNorm = 55.0559, GNorm = 0.6101, lr_0 = 6.5201e-04
Loss = 1.0202e-01, PNorm = 55.0687, GNorm = 0.7472, lr_0 = 6.5157e-04
Loss = 1.1496e-01, PNorm = 55.0873, GNorm = 0.5038, lr_0 = 6.5112e-04
Loss = 1.1630e-01, PNorm = 55.1012, GNorm = 0.5586, lr_0 = 6.5067e-04
Loss = 9.2973e-02, PNorm = 55.1114, GNorm = 0.6162, lr_0 = 6.5023e-04
Loss = 1.1888e-01, PNorm = 55.1246, GNorm = 0.9739, lr_0 = 6.4978e-04
Loss = 1.1129e-01, PNorm = 55.1420, GNorm = 0.8156, lr_0 = 6.4934e-04
Loss = 1.1587e-01, PNorm = 55.1674, GNorm = 1.3482, lr_0 = 6.4889e-04
Loss = 1.1033e-01, PNorm = 55.1831, GNorm = 1.0860, lr_0 = 6.4845e-04
Loss = 1.1603e-01, PNorm = 55.1989, GNorm = 0.8206, lr_0 = 6.4800e-04
Loss = 1.0997e-01, PNorm = 55.2094, GNorm = 0.8276, lr_0 = 6.4756e-04
Loss = 1.1068e-01, PNorm = 55.2267, GNorm = 0.6930, lr_0 = 6.4712e-04
Loss = 1.1984e-01, PNorm = 55.2458, GNorm = 1.0667, lr_0 = 6.4667e-04
Loss = 1.1130e-01, PNorm = 55.2638, GNorm = 2.5691, lr_0 = 6.4623e-04
Loss = 1.3025e-01, PNorm = 55.2811, GNorm = 1.4240, lr_0 = 6.4579e-04
Loss = 1.8379e-01, PNorm = 55.3025, GNorm = 3.1837, lr_0 = 6.4534e-04
Loss = 1.1473e-01, PNorm = 55.3293, GNorm = 0.9319, lr_0 = 6.4490e-04
Loss = 1.2519e-01, PNorm = 55.3581, GNorm = 0.7478, lr_0 = 6.4446e-04
Loss = 1.3003e-01, PNorm = 55.3748, GNorm = 0.7652, lr_0 = 6.4402e-04
Loss = 1.0647e-01, PNorm = 55.3900, GNorm = 1.0796, lr_0 = 6.4358e-04
Loss = 1.1271e-01, PNorm = 55.4098, GNorm = 0.6033, lr_0 = 6.4314e-04
Loss = 1.1547e-01, PNorm = 55.4250, GNorm = 0.6135, lr_0 = 6.4270e-04
Loss = 1.4007e-01, PNorm = 55.4413, GNorm = 2.0186, lr_0 = 6.4226e-04
Loss = 1.0588e-01, PNorm = 55.4619, GNorm = 0.6332, lr_0 = 6.4182e-04
Loss = 1.2122e-01, PNorm = 55.4795, GNorm = 3.1091, lr_0 = 6.4138e-04
Loss = 1.3343e-01, PNorm = 55.4884, GNorm = 1.1826, lr_0 = 6.4094e-04
Loss = 1.3400e-01, PNorm = 55.5052, GNorm = 0.9310, lr_0 = 6.4050e-04
Loss = 1.1113e-01, PNorm = 55.5257, GNorm = 0.6399, lr_0 = 6.4006e-04
Loss = 1.2627e-01, PNorm = 55.5435, GNorm = 0.6824, lr_0 = 6.3962e-04
Loss = 1.2897e-01, PNorm = 55.5586, GNorm = 0.5400, lr_0 = 6.3918e-04
Loss = 1.1563e-01, PNorm = 55.5770, GNorm = 0.7275, lr_0 = 6.3874e-04
Loss = 1.1081e-01, PNorm = 55.5976, GNorm = 0.7380, lr_0 = 6.3831e-04
Loss = 1.2869e-01, PNorm = 55.6221, GNorm = 1.1165, lr_0 = 6.3787e-04
Loss = 1.1347e-01, PNorm = 55.6364, GNorm = 1.2156, lr_0 = 6.3743e-04
Loss = 1.1117e-01, PNorm = 55.6500, GNorm = 0.9533, lr_0 = 6.3700e-04
Loss = 1.2164e-01, PNorm = 55.6695, GNorm = 0.7782, lr_0 = 6.3656e-04
Loss = 1.2795e-01, PNorm = 55.6856, GNorm = 0.9352, lr_0 = 6.3612e-04
Loss = 1.1497e-01, PNorm = 55.6974, GNorm = 0.7458, lr_0 = 6.3569e-04
Loss = 1.0508e-01, PNorm = 55.7079, GNorm = 0.9296, lr_0 = 6.3525e-04
Loss = 1.0723e-01, PNorm = 55.7241, GNorm = 0.4701, lr_0 = 6.3482e-04
Loss = 1.0848e-01, PNorm = 55.7389, GNorm = 1.9678, lr_0 = 6.3438e-04
Loss = 1.1386e-01, PNorm = 55.7505, GNorm = 0.7293, lr_0 = 6.3395e-04
Loss = 1.2674e-01, PNorm = 55.7649, GNorm = 1.3779, lr_0 = 6.3351e-04
Loss = 1.2712e-01, PNorm = 55.7813, GNorm = 0.8929, lr_0 = 6.3308e-04
Loss = 1.1377e-01, PNorm = 55.8023, GNorm = 0.5666, lr_0 = 6.3265e-04
Loss = 1.3448e-01, PNorm = 55.8182, GNorm = 1.9338, lr_0 = 6.3221e-04
Loss = 1.4258e-01, PNorm = 55.8412, GNorm = 1.1178, lr_0 = 6.3178e-04
Loss = 1.1721e-01, PNorm = 55.8598, GNorm = 1.3283, lr_0 = 6.3135e-04
Loss = 1.3745e-01, PNorm = 55.8836, GNorm = 1.2142, lr_0 = 6.3091e-04
Loss = 1.2053e-01, PNorm = 55.9023, GNorm = 0.6604, lr_0 = 6.3048e-04
Loss = 1.2327e-01, PNorm = 55.9215, GNorm = 0.6638, lr_0 = 6.3005e-04
Loss = 1.0441e-01, PNorm = 55.9329, GNorm = 0.6787, lr_0 = 6.2962e-04
Loss = 1.1969e-01, PNorm = 55.9417, GNorm = 2.0231, lr_0 = 6.2919e-04
Loss = 1.3892e-01, PNorm = 55.9550, GNorm = 1.6988, lr_0 = 6.2876e-04
Loss = 1.1870e-01, PNorm = 55.9759, GNorm = 0.9148, lr_0 = 6.2833e-04
Loss = 1.2475e-01, PNorm = 55.9982, GNorm = 0.7132, lr_0 = 6.2789e-04
Loss = 1.2803e-01, PNorm = 56.0137, GNorm = 0.7879, lr_0 = 6.2746e-04
Loss = 1.1750e-01, PNorm = 56.0226, GNorm = 0.9163, lr_0 = 6.2703e-04
Loss = 1.1271e-01, PNorm = 56.0382, GNorm = 0.5931, lr_0 = 6.2661e-04
Loss = 1.1804e-01, PNorm = 56.0579, GNorm = 0.9282, lr_0 = 6.2618e-04
Loss = 1.2357e-01, PNorm = 56.0668, GNorm = 0.7850, lr_0 = 6.2575e-04
Loss = 1.2463e-01, PNorm = 56.0772, GNorm = 0.6662, lr_0 = 6.2532e-04
Loss = 1.0890e-01, PNorm = 56.0958, GNorm = 0.6064, lr_0 = 6.2489e-04
Loss = 1.1662e-01, PNorm = 56.1146, GNorm = 0.7656, lr_0 = 6.2446e-04
Loss = 1.1492e-01, PNorm = 56.1288, GNorm = 0.7653, lr_0 = 6.2403e-04
Loss = 1.1187e-01, PNorm = 56.1485, GNorm = 0.6531, lr_0 = 6.2361e-04
Loss = 1.0853e-01, PNorm = 56.1632, GNorm = 0.5795, lr_0 = 6.2318e-04
Loss = 1.0398e-01, PNorm = 56.1725, GNorm = 1.2951, lr_0 = 6.2275e-04
Loss = 1.2994e-01, PNorm = 56.1847, GNorm = 0.9515, lr_0 = 6.2233e-04
Loss = 1.1755e-01, PNorm = 56.2006, GNorm = 1.2478, lr_0 = 6.2190e-04
Loss = 9.7351e-02, PNorm = 56.2184, GNorm = 0.9043, lr_0 = 6.2147e-04
Loss = 1.1086e-01, PNorm = 56.2314, GNorm = 0.8800, lr_0 = 6.2105e-04
Loss = 1.1474e-01, PNorm = 56.2423, GNorm = 0.6688, lr_0 = 6.2062e-04
Loss = 1.0646e-01, PNorm = 56.2564, GNorm = 0.5747, lr_0 = 6.2020e-04
Loss = 1.1067e-01, PNorm = 56.2731, GNorm = 0.7292, lr_0 = 6.1977e-04
Loss = 1.0210e-01, PNorm = 56.2860, GNorm = 0.3757, lr_0 = 6.1935e-04
Loss = 1.1758e-01, PNorm = 56.2992, GNorm = 1.0503, lr_0 = 6.1892e-04
Loss = 1.1128e-01, PNorm = 56.3106, GNorm = 0.7386, lr_0 = 6.1850e-04
Loss = 1.1700e-01, PNorm = 56.3227, GNorm = 0.7795, lr_0 = 6.1808e-04
Loss = 1.2575e-01, PNorm = 56.3386, GNorm = 1.7212, lr_0 = 6.1765e-04
Loss = 1.0240e-01, PNorm = 56.3579, GNorm = 0.9025, lr_0 = 6.1723e-04
Loss = 9.9376e-02, PNorm = 56.3754, GNorm = 0.5778, lr_0 = 6.1681e-04
Loss = 1.1697e-01, PNorm = 56.3896, GNorm = 0.7892, lr_0 = 6.1638e-04
Loss = 1.0079e-01, PNorm = 56.4012, GNorm = 0.5256, lr_0 = 6.1596e-04
Loss = 1.2984e-01, PNorm = 56.4157, GNorm = 0.8388, lr_0 = 6.1554e-04
Loss = 1.1095e-01, PNorm = 56.4268, GNorm = 0.6444, lr_0 = 6.1512e-04
Loss = 1.2076e-01, PNorm = 56.4371, GNorm = 0.7771, lr_0 = 6.1470e-04
Loss = 1.2086e-01, PNorm = 56.4527, GNorm = 0.9257, lr_0 = 6.1428e-04
Loss = 1.2703e-01, PNorm = 56.4686, GNorm = 1.4086, lr_0 = 6.1385e-04
Loss = 1.1391e-01, PNorm = 56.4825, GNorm = 0.8418, lr_0 = 6.1343e-04
Loss = 1.1737e-01, PNorm = 56.4985, GNorm = 0.4097, lr_0 = 6.1301e-04
Loss = 1.1868e-01, PNorm = 56.5138, GNorm = 0.8598, lr_0 = 6.1259e-04
Loss = 1.2824e-01, PNorm = 56.5365, GNorm = 1.3210, lr_0 = 6.1217e-04
Loss = 1.0546e-01, PNorm = 56.5581, GNorm = 0.7605, lr_0 = 6.1175e-04
Loss = 1.1623e-01, PNorm = 56.5760, GNorm = 1.0199, lr_0 = 6.1134e-04
Loss = 1.0041e-01, PNorm = 56.5944, GNorm = 0.8679, lr_0 = 6.1092e-04
Loss = 1.2297e-01, PNorm = 56.6116, GNorm = 0.6126, lr_0 = 6.1050e-04
Validation mae = 0.428033
Epoch 8
Loss = 1.0211e-01, PNorm = 56.6304, GNorm = 0.5309, lr_0 = 6.1008e-04
Loss = 9.4690e-02, PNorm = 56.6447, GNorm = 0.7210, lr_0 = 6.0966e-04
Loss = 8.9186e-02, PNorm = 56.6545, GNorm = 1.5152, lr_0 = 6.0924e-04
Loss = 1.1047e-01, PNorm = 56.6659, GNorm = 0.9161, lr_0 = 6.0883e-04
Loss = 1.1423e-01, PNorm = 56.6727, GNorm = 0.7127, lr_0 = 6.0841e-04
Loss = 1.0506e-01, PNorm = 56.6854, GNorm = 0.7825, lr_0 = 6.0799e-04
Loss = 1.0871e-01, PNorm = 56.7029, GNorm = 0.5505, lr_0 = 6.0758e-04
Loss = 1.0288e-01, PNorm = 56.7188, GNorm = 0.5506, lr_0 = 6.0716e-04
Loss = 1.1427e-01, PNorm = 56.7300, GNorm = 0.9139, lr_0 = 6.0674e-04
Loss = 1.2535e-01, PNorm = 56.7475, GNorm = 0.7054, lr_0 = 6.0633e-04
Loss = 1.4177e-01, PNorm = 56.7651, GNorm = 2.0213, lr_0 = 6.0591e-04
Loss = 1.1025e-01, PNorm = 56.7836, GNorm = 1.7289, lr_0 = 6.0550e-04
Loss = 1.0539e-01, PNorm = 56.8012, GNorm = 0.6578, lr_0 = 6.0508e-04
Loss = 1.2333e-01, PNorm = 56.8183, GNorm = 0.5186, lr_0 = 6.0467e-04
Loss = 9.7318e-02, PNorm = 56.8352, GNorm = 0.5926, lr_0 = 6.0425e-04
Loss = 9.6843e-02, PNorm = 56.8510, GNorm = 0.8356, lr_0 = 6.0384e-04
Loss = 1.0454e-01, PNorm = 56.8732, GNorm = 0.6485, lr_0 = 6.0343e-04
Loss = 1.1610e-01, PNorm = 56.8965, GNorm = 0.9745, lr_0 = 6.0301e-04
Loss = 9.0435e-02, PNorm = 56.9135, GNorm = 1.2125, lr_0 = 6.0260e-04
Loss = 1.0517e-01, PNorm = 56.9235, GNorm = 0.7606, lr_0 = 6.0219e-04
Loss = 1.0096e-01, PNorm = 56.9351, GNorm = 1.1038, lr_0 = 6.0178e-04
Loss = 1.2038e-01, PNorm = 56.9526, GNorm = 1.8944, lr_0 = 6.0136e-04
Loss = 1.0270e-01, PNorm = 56.9715, GNorm = 1.1236, lr_0 = 6.0095e-04
Loss = 1.1289e-01, PNorm = 56.9883, GNorm = 0.9663, lr_0 = 6.0054e-04
Loss = 1.0515e-01, PNorm = 57.0054, GNorm = 1.5342, lr_0 = 6.0013e-04
Loss = 1.0842e-01, PNorm = 57.0247, GNorm = 1.4490, lr_0 = 5.9972e-04
Loss = 1.1787e-01, PNorm = 57.0491, GNorm = 1.2896, lr_0 = 5.9931e-04
Loss = 1.2345e-01, PNorm = 57.0651, GNorm = 1.0208, lr_0 = 5.9890e-04
Loss = 1.3562e-01, PNorm = 57.0890, GNorm = 0.5353, lr_0 = 5.9849e-04
Loss = 1.1172e-01, PNorm = 57.1065, GNorm = 0.5931, lr_0 = 5.9808e-04
Loss = 1.0281e-01, PNorm = 57.1252, GNorm = 0.7407, lr_0 = 5.9767e-04
Loss = 1.2214e-01, PNorm = 57.1437, GNorm = 0.8038, lr_0 = 5.9726e-04
Loss = 9.7386e-02, PNorm = 57.1657, GNorm = 0.5152, lr_0 = 5.9685e-04
Loss = 1.1149e-01, PNorm = 57.1743, GNorm = 0.6111, lr_0 = 5.9644e-04
Loss = 9.1215e-02, PNorm = 57.1865, GNorm = 0.4963, lr_0 = 5.9603e-04
Loss = 9.4861e-02, PNorm = 57.1998, GNorm = 0.9823, lr_0 = 5.9562e-04
Loss = 9.1283e-02, PNorm = 57.2117, GNorm = 0.7551, lr_0 = 5.9521e-04
Loss = 1.0134e-01, PNorm = 57.2251, GNorm = 0.6531, lr_0 = 5.9481e-04
Loss = 9.7194e-02, PNorm = 57.2389, GNorm = 0.6793, lr_0 = 5.9440e-04
Loss = 1.2022e-01, PNorm = 57.2510, GNorm = 0.8851, lr_0 = 5.9399e-04
Loss = 1.0279e-01, PNorm = 57.2679, GNorm = 0.6047, lr_0 = 5.9358e-04
Loss = 1.0593e-01, PNorm = 57.2784, GNorm = 0.5987, lr_0 = 5.9318e-04
Loss = 1.0660e-01, PNorm = 57.2906, GNorm = 0.7200, lr_0 = 5.9277e-04
Loss = 1.2192e-01, PNorm = 57.3063, GNorm = 0.4958, lr_0 = 5.9236e-04
Loss = 1.1587e-01, PNorm = 57.3321, GNorm = 1.0663, lr_0 = 5.9196e-04
Loss = 1.1520e-01, PNorm = 57.3501, GNorm = 0.6264, lr_0 = 5.9155e-04
Loss = 1.1548e-01, PNorm = 57.3668, GNorm = 1.0806, lr_0 = 5.9115e-04
Loss = 1.1110e-01, PNorm = 57.3808, GNorm = 0.7888, lr_0 = 5.9074e-04
Loss = 1.2068e-01, PNorm = 57.3906, GNorm = 0.6620, lr_0 = 5.9034e-04
Loss = 9.8588e-02, PNorm = 57.3987, GNorm = 0.7504, lr_0 = 5.8993e-04
Loss = 1.1571e-01, PNorm = 57.4154, GNorm = 1.3520, lr_0 = 5.8953e-04
Loss = 1.1957e-01, PNorm = 57.4337, GNorm = 1.9013, lr_0 = 5.8913e-04
Loss = 1.0009e-01, PNorm = 57.4515, GNorm = 0.8069, lr_0 = 5.8872e-04
Loss = 1.1144e-01, PNorm = 57.4737, GNorm = 0.7610, lr_0 = 5.8832e-04
Loss = 9.7014e-02, PNorm = 57.4909, GNorm = 0.5506, lr_0 = 5.8792e-04
Loss = 1.0258e-01, PNorm = 57.5068, GNorm = 0.6287, lr_0 = 5.8751e-04
Loss = 1.0936e-01, PNorm = 57.5214, GNorm = 1.2144, lr_0 = 5.8711e-04
Loss = 1.0326e-01, PNorm = 57.5319, GNorm = 0.8663, lr_0 = 5.8671e-04
Loss = 1.1402e-01, PNorm = 57.5464, GNorm = 1.0389, lr_0 = 5.8631e-04
Loss = 1.0154e-01, PNorm = 57.5623, GNorm = 0.6001, lr_0 = 5.8591e-04
Loss = 1.0728e-01, PNorm = 57.5834, GNorm = 0.6279, lr_0 = 5.8550e-04
Loss = 1.2360e-01, PNorm = 57.5983, GNorm = 0.6910, lr_0 = 5.8510e-04
Loss = 9.4348e-02, PNorm = 57.6147, GNorm = 0.5332, lr_0 = 5.8470e-04
Loss = 1.1401e-01, PNorm = 57.6342, GNorm = 0.6542, lr_0 = 5.8430e-04
Loss = 1.3123e-01, PNorm = 57.6502, GNorm = 0.6409, lr_0 = 5.8390e-04
Loss = 1.1458e-01, PNorm = 57.6666, GNorm = 0.7885, lr_0 = 5.8350e-04
Loss = 1.0309e-01, PNorm = 57.6764, GNorm = 0.5876, lr_0 = 5.8310e-04
Loss = 1.2542e-01, PNorm = 57.6928, GNorm = 0.6722, lr_0 = 5.8270e-04
Loss = 9.9367e-02, PNorm = 57.7055, GNorm = 1.2599, lr_0 = 5.8230e-04
Loss = 1.1163e-01, PNorm = 57.7156, GNorm = 1.1569, lr_0 = 5.8190e-04
Loss = 1.0999e-01, PNorm = 57.7302, GNorm = 0.9295, lr_0 = 5.8151e-04
Loss = 1.2297e-01, PNorm = 57.7486, GNorm = 1.1267, lr_0 = 5.8111e-04
Loss = 9.5545e-02, PNorm = 57.7609, GNorm = 1.0622, lr_0 = 5.8071e-04
Loss = 1.2317e-01, PNorm = 57.7743, GNorm = 0.7003, lr_0 = 5.8031e-04
Loss = 1.0047e-01, PNorm = 57.7867, GNorm = 0.8447, lr_0 = 5.7991e-04
Loss = 1.1167e-01, PNorm = 57.8021, GNorm = 0.6900, lr_0 = 5.7952e-04
Loss = 1.0542e-01, PNorm = 57.8152, GNorm = 0.6310, lr_0 = 5.7912e-04
Loss = 1.0616e-01, PNorm = 57.8269, GNorm = 0.8125, lr_0 = 5.7872e-04
Loss = 1.0680e-01, PNorm = 57.8423, GNorm = 0.6473, lr_0 = 5.7833e-04
Loss = 1.2670e-01, PNorm = 57.8562, GNorm = 1.8760, lr_0 = 5.7793e-04
Loss = 1.3454e-01, PNorm = 57.8758, GNorm = 1.2820, lr_0 = 5.7753e-04
Loss = 1.4500e-01, PNorm = 57.8946, GNorm = 1.5941, lr_0 = 5.7714e-04
Loss = 1.1974e-01, PNorm = 57.9118, GNorm = 1.4498, lr_0 = 5.7674e-04
Loss = 1.0532e-01, PNorm = 57.9286, GNorm = 0.7646, lr_0 = 5.7635e-04
Loss = 1.2384e-01, PNorm = 57.9457, GNorm = 0.8016, lr_0 = 5.7595e-04
Loss = 1.1184e-01, PNorm = 57.9583, GNorm = 0.8843, lr_0 = 5.7556e-04
Loss = 1.0644e-01, PNorm = 57.9713, GNorm = 0.6495, lr_0 = 5.7516e-04
Loss = 8.5088e-02, PNorm = 57.9857, GNorm = 0.6122, lr_0 = 5.7477e-04
Loss = 1.1037e-01, PNorm = 58.0002, GNorm = 0.6593, lr_0 = 5.7438e-04
Loss = 1.1415e-01, PNorm = 58.0155, GNorm = 0.6217, lr_0 = 5.7398e-04
Loss = 8.9119e-02, PNorm = 58.0316, GNorm = 0.6493, lr_0 = 5.7359e-04
Loss = 9.8867e-02, PNorm = 58.0467, GNorm = 0.7050, lr_0 = 5.7320e-04
Loss = 1.1646e-01, PNorm = 58.0615, GNorm = 0.6945, lr_0 = 5.7280e-04
Loss = 1.2896e-01, PNorm = 58.0757, GNorm = 0.6529, lr_0 = 5.7241e-04
Loss = 1.3703e-01, PNorm = 58.0885, GNorm = 0.7680, lr_0 = 5.7202e-04
Loss = 1.0055e-01, PNorm = 58.1006, GNorm = 0.6895, lr_0 = 5.7163e-04
Loss = 1.1301e-01, PNorm = 58.1180, GNorm = 1.3498, lr_0 = 5.7124e-04
Loss = 9.7146e-02, PNorm = 58.1368, GNorm = 0.4561, lr_0 = 5.7084e-04
Loss = 1.0952e-01, PNorm = 58.1457, GNorm = 1.1796, lr_0 = 5.7045e-04
Loss = 9.7182e-02, PNorm = 58.1545, GNorm = 1.0838, lr_0 = 5.7006e-04
Loss = 1.1261e-01, PNorm = 58.1689, GNorm = 1.3439, lr_0 = 5.6967e-04
Loss = 1.1508e-01, PNorm = 58.1845, GNorm = 0.8111, lr_0 = 5.6928e-04
Loss = 1.1599e-01, PNorm = 58.1966, GNorm = 0.7026, lr_0 = 5.6889e-04
Loss = 1.0786e-01, PNorm = 58.2100, GNorm = 0.5683, lr_0 = 5.6850e-04
Loss = 8.8516e-02, PNorm = 58.2249, GNorm = 0.4629, lr_0 = 5.6811e-04
Loss = 1.2961e-01, PNorm = 58.2414, GNorm = 0.4832, lr_0 = 5.6772e-04
Loss = 1.1133e-01, PNorm = 58.2554, GNorm = 0.9079, lr_0 = 5.6733e-04
Loss = 1.0704e-01, PNorm = 58.2687, GNorm = 1.0766, lr_0 = 5.6695e-04
Loss = 1.1092e-01, PNorm = 58.2791, GNorm = 0.5020, lr_0 = 5.6656e-04
Loss = 1.1081e-01, PNorm = 58.2957, GNorm = 0.6525, lr_0 = 5.6617e-04
Loss = 9.3799e-02, PNorm = 58.3085, GNorm = 0.9088, lr_0 = 5.6578e-04
Loss = 1.0552e-01, PNorm = 58.3213, GNorm = 0.8635, lr_0 = 5.6539e-04
Loss = 1.1837e-01, PNorm = 58.3327, GNorm = 1.0020, lr_0 = 5.6501e-04
Loss = 1.2156e-01, PNorm = 58.3498, GNorm = 0.7303, lr_0 = 5.6462e-04
Loss = 9.7166e-02, PNorm = 58.3658, GNorm = 1.0262, lr_0 = 5.6423e-04
Loss = 9.4012e-02, PNorm = 58.3803, GNorm = 0.9572, lr_0 = 5.6385e-04
Loss = 1.0621e-01, PNorm = 58.3881, GNorm = 0.7902, lr_0 = 5.6346e-04
Loss = 1.1942e-01, PNorm = 58.3946, GNorm = 0.6493, lr_0 = 5.6307e-04
Loss = 1.3451e-01, PNorm = 58.4110, GNorm = 0.7024, lr_0 = 5.6269e-04
Loss = 1.2094e-01, PNorm = 58.4289, GNorm = 1.0536, lr_0 = 5.6230e-04
Validation mae = 0.414989
Epoch 9
Loss = 9.3106e-02, PNorm = 58.4500, GNorm = 0.4505, lr_0 = 5.6192e-04
Loss = 9.8867e-02, PNorm = 58.4685, GNorm = 0.6373, lr_0 = 5.6153e-04
Loss = 1.0765e-01, PNorm = 58.4827, GNorm = 0.4703, lr_0 = 5.6115e-04
Loss = 9.8278e-02, PNorm = 58.4949, GNorm = 0.9688, lr_0 = 5.6076e-04
Loss = 9.5922e-02, PNorm = 58.5056, GNorm = 0.4729, lr_0 = 5.6038e-04
Loss = 1.1045e-01, PNorm = 58.5187, GNorm = 0.5513, lr_0 = 5.6000e-04
Loss = 9.5541e-02, PNorm = 58.5292, GNorm = 1.3657, lr_0 = 5.5961e-04
Loss = 8.7079e-02, PNorm = 58.5385, GNorm = 0.7415, lr_0 = 5.5923e-04
Loss = 8.6825e-02, PNorm = 58.5477, GNorm = 0.6401, lr_0 = 5.5885e-04
Loss = 1.0980e-01, PNorm = 58.5595, GNorm = 0.5539, lr_0 = 5.5846e-04
Loss = 9.1648e-02, PNorm = 58.5730, GNorm = 0.6914, lr_0 = 5.5808e-04
Loss = 9.3865e-02, PNorm = 58.5890, GNorm = 0.7423, lr_0 = 5.5770e-04
Loss = 9.2100e-02, PNorm = 58.6036, GNorm = 0.5595, lr_0 = 5.5732e-04
Loss = 1.0519e-01, PNorm = 58.6178, GNorm = 1.1454, lr_0 = 5.5693e-04
Loss = 9.7295e-02, PNorm = 58.6389, GNorm = 0.4477, lr_0 = 5.5655e-04
Loss = 9.5271e-02, PNorm = 58.6597, GNorm = 0.8907, lr_0 = 5.5617e-04
Loss = 1.1048e-01, PNorm = 58.6708, GNorm = 1.0588, lr_0 = 5.5579e-04
Loss = 9.8709e-02, PNorm = 58.6852, GNorm = 0.7137, lr_0 = 5.5541e-04
Loss = 1.1042e-01, PNorm = 58.6983, GNorm = 1.0563, lr_0 = 5.5503e-04
Loss = 7.7645e-02, PNorm = 58.7116, GNorm = 0.6194, lr_0 = 5.5465e-04
Loss = 1.1264e-01, PNorm = 58.7255, GNorm = 1.2208, lr_0 = 5.5427e-04
Loss = 1.0364e-01, PNorm = 58.7413, GNorm = 0.7765, lr_0 = 5.5389e-04
Loss = 1.0958e-01, PNorm = 58.7556, GNorm = 1.7446, lr_0 = 5.5351e-04
Loss = 1.0497e-01, PNorm = 58.7774, GNorm = 1.3369, lr_0 = 5.5313e-04
Loss = 9.5971e-02, PNorm = 58.7982, GNorm = 0.8299, lr_0 = 5.5275e-04
Loss = 1.0243e-01, PNorm = 58.8168, GNorm = 1.2549, lr_0 = 5.5237e-04
Loss = 9.6658e-02, PNorm = 58.8308, GNorm = 0.9103, lr_0 = 5.5199e-04
Loss = 1.0577e-01, PNorm = 58.8401, GNorm = 1.0662, lr_0 = 5.5162e-04
Loss = 1.0347e-01, PNorm = 58.8536, GNorm = 1.0661, lr_0 = 5.5124e-04
Loss = 1.0592e-01, PNorm = 58.8695, GNorm = 0.9705, lr_0 = 5.5086e-04
Loss = 8.7406e-02, PNorm = 58.8819, GNorm = 0.5973, lr_0 = 5.5048e-04
Loss = 1.2171e-01, PNorm = 58.8953, GNorm = 0.9401, lr_0 = 5.5011e-04
Loss = 9.6428e-02, PNorm = 58.9050, GNorm = 0.9296, lr_0 = 5.4973e-04
Loss = 8.8973e-02, PNorm = 58.9182, GNorm = 1.8144, lr_0 = 5.4935e-04
Loss = 1.1085e-01, PNorm = 58.9318, GNorm = 0.7102, lr_0 = 5.4898e-04
Loss = 1.1153e-01, PNorm = 58.9482, GNorm = 1.0073, lr_0 = 5.4860e-04
Loss = 1.1101e-01, PNorm = 58.9652, GNorm = 0.4769, lr_0 = 5.4822e-04
Loss = 9.7258e-02, PNorm = 58.9791, GNorm = 0.6204, lr_0 = 5.4785e-04
Loss = 1.0004e-01, PNorm = 58.9927, GNorm = 0.9085, lr_0 = 5.4747e-04
Loss = 1.0028e-01, PNorm = 59.0030, GNorm = 0.6344, lr_0 = 5.4710e-04
Loss = 8.8935e-02, PNorm = 59.0177, GNorm = 0.6203, lr_0 = 5.4672e-04
Loss = 1.0725e-01, PNorm = 59.0370, GNorm = 0.7936, lr_0 = 5.4635e-04
Loss = 9.3553e-02, PNorm = 59.0526, GNorm = 0.7511, lr_0 = 5.4597e-04
Loss = 1.0117e-01, PNorm = 59.0646, GNorm = 0.9005, lr_0 = 5.4560e-04
Loss = 9.9091e-02, PNorm = 59.0766, GNorm = 0.4960, lr_0 = 5.4523e-04
Loss = 9.1399e-02, PNorm = 59.0902, GNorm = 0.5820, lr_0 = 5.4485e-04
Loss = 1.1915e-01, PNorm = 59.1041, GNorm = 0.5122, lr_0 = 5.4448e-04
Loss = 1.0385e-01, PNorm = 59.1196, GNorm = 0.7632, lr_0 = 5.4411e-04
Loss = 9.3259e-02, PNorm = 59.1369, GNorm = 0.5723, lr_0 = 5.4373e-04
Loss = 8.1474e-02, PNorm = 59.1527, GNorm = 0.6276, lr_0 = 5.4336e-04
Loss = 8.5424e-02, PNorm = 59.1643, GNorm = 1.1041, lr_0 = 5.4299e-04
Loss = 9.0001e-02, PNorm = 59.1721, GNorm = 1.0909, lr_0 = 5.4262e-04
Loss = 9.7570e-02, PNorm = 59.1844, GNorm = 1.1498, lr_0 = 5.4225e-04
Loss = 9.6444e-02, PNorm = 59.1969, GNorm = 0.6310, lr_0 = 5.4187e-04
Loss = 1.0313e-01, PNorm = 59.2109, GNorm = 1.4869, lr_0 = 5.4150e-04
Loss = 1.0176e-01, PNorm = 59.2288, GNorm = 0.5476, lr_0 = 5.4113e-04
Loss = 1.0425e-01, PNorm = 59.2505, GNorm = 0.6044, lr_0 = 5.4076e-04
Loss = 1.1072e-01, PNorm = 59.2679, GNorm = 0.6003, lr_0 = 5.4039e-04
Loss = 1.0025e-01, PNorm = 59.2798, GNorm = 0.5340, lr_0 = 5.4002e-04
Loss = 1.0498e-01, PNorm = 59.2904, GNorm = 1.0362, lr_0 = 5.3965e-04
Loss = 1.0538e-01, PNorm = 59.3031, GNorm = 0.8245, lr_0 = 5.3928e-04
Loss = 1.1452e-01, PNorm = 59.3175, GNorm = 0.6178, lr_0 = 5.3891e-04
Loss = 1.1107e-01, PNorm = 59.3344, GNorm = 0.7460, lr_0 = 5.3854e-04
Loss = 9.4020e-02, PNorm = 59.3476, GNorm = 0.6138, lr_0 = 5.3817e-04
Loss = 1.1118e-01, PNorm = 59.3591, GNorm = 0.8567, lr_0 = 5.3781e-04
Loss = 9.8448e-02, PNorm = 59.3755, GNorm = 0.8646, lr_0 = 5.3744e-04
Loss = 1.0589e-01, PNorm = 59.3887, GNorm = 0.8338, lr_0 = 5.3707e-04
Loss = 1.2009e-01, PNorm = 59.4074, GNorm = 0.6364, lr_0 = 5.3670e-04
Loss = 9.6294e-02, PNorm = 59.4219, GNorm = 0.7654, lr_0 = 5.3633e-04
Loss = 8.6420e-02, PNorm = 59.4336, GNorm = 0.6531, lr_0 = 5.3597e-04
Loss = 9.4737e-02, PNorm = 59.4466, GNorm = 0.7079, lr_0 = 5.3560e-04
Loss = 9.1365e-02, PNorm = 59.4599, GNorm = 0.5819, lr_0 = 5.3523e-04
Loss = 9.3721e-02, PNorm = 59.4698, GNorm = 0.6002, lr_0 = 5.3486e-04
Loss = 1.0231e-01, PNorm = 59.4871, GNorm = 0.7073, lr_0 = 5.3450e-04
Loss = 1.0869e-01, PNorm = 59.5052, GNorm = 1.2158, lr_0 = 5.3413e-04
Loss = 1.0304e-01, PNorm = 59.5220, GNorm = 1.1368, lr_0 = 5.3377e-04
Loss = 9.7793e-02, PNorm = 59.5400, GNorm = 0.9558, lr_0 = 5.3340e-04
Loss = 1.0233e-01, PNorm = 59.5554, GNorm = 0.6267, lr_0 = 5.3304e-04
Loss = 9.5318e-02, PNorm = 59.5714, GNorm = 0.6930, lr_0 = 5.3267e-04
Loss = 1.0343e-01, PNorm = 59.5836, GNorm = 1.3821, lr_0 = 5.3231e-04
Loss = 9.4762e-02, PNorm = 59.5899, GNorm = 1.1944, lr_0 = 5.3194e-04
Loss = 1.0151e-01, PNorm = 59.5996, GNorm = 0.6568, lr_0 = 5.3158e-04
Loss = 9.8700e-02, PNorm = 59.6116, GNorm = 1.3991, lr_0 = 5.3121e-04
Loss = 9.8713e-02, PNorm = 59.6228, GNorm = 1.1208, lr_0 = 5.3085e-04
Loss = 1.1845e-01, PNorm = 59.6336, GNorm = 0.8240, lr_0 = 5.3048e-04
Loss = 1.0144e-01, PNorm = 59.6475, GNorm = 0.7588, lr_0 = 5.3012e-04
Loss = 1.1174e-01, PNorm = 59.6649, GNorm = 1.0475, lr_0 = 5.2976e-04
Loss = 1.0527e-01, PNorm = 59.6764, GNorm = 1.0575, lr_0 = 5.2939e-04
Loss = 9.6527e-02, PNorm = 59.6914, GNorm = 0.6238, lr_0 = 5.2903e-04
Loss = 1.0536e-01, PNorm = 59.7070, GNorm = 1.3148, lr_0 = 5.2867e-04
Loss = 1.0865e-01, PNorm = 59.7182, GNorm = 1.1097, lr_0 = 5.2831e-04
Loss = 1.1343e-01, PNorm = 59.7267, GNorm = 0.5991, lr_0 = 5.2795e-04
Loss = 1.0226e-01, PNorm = 59.7404, GNorm = 0.5223, lr_0 = 5.2758e-04
Loss = 1.0138e-01, PNorm = 59.7569, GNorm = 0.5963, lr_0 = 5.2722e-04
Loss = 9.7389e-02, PNorm = 59.7707, GNorm = 0.5162, lr_0 = 5.2686e-04
Loss = 1.1622e-01, PNorm = 59.7817, GNorm = 0.5715, lr_0 = 5.2650e-04
Loss = 1.0902e-01, PNorm = 59.7944, GNorm = 0.6332, lr_0 = 5.2614e-04
Loss = 9.7529e-02, PNorm = 59.8051, GNorm = 0.6644, lr_0 = 5.2578e-04
Loss = 1.0132e-01, PNorm = 59.8183, GNorm = 0.7121, lr_0 = 5.2542e-04
Loss = 1.1461e-01, PNorm = 59.8320, GNorm = 1.3336, lr_0 = 5.2506e-04
Loss = 1.0005e-01, PNorm = 59.8482, GNorm = 1.0352, lr_0 = 5.2470e-04
Loss = 9.1375e-02, PNorm = 59.8605, GNorm = 0.7459, lr_0 = 5.2434e-04
Loss = 9.6295e-02, PNorm = 59.8688, GNorm = 0.6093, lr_0 = 5.2398e-04
Loss = 1.0824e-01, PNorm = 59.8834, GNorm = 0.7047, lr_0 = 5.2362e-04
Loss = 9.3444e-02, PNorm = 59.8997, GNorm = 0.4776, lr_0 = 5.2326e-04
Loss = 8.8203e-02, PNorm = 59.9147, GNorm = 0.6108, lr_0 = 5.2290e-04
Loss = 8.7444e-02, PNorm = 59.9287, GNorm = 0.7051, lr_0 = 5.2255e-04
Loss = 1.0330e-01, PNorm = 59.9423, GNorm = 1.7894, lr_0 = 5.2219e-04
Loss = 9.3125e-02, PNorm = 59.9514, GNorm = 0.7283, lr_0 = 5.2183e-04
Loss = 1.0765e-01, PNorm = 59.9590, GNorm = 0.9951, lr_0 = 5.2147e-04
Loss = 1.0484e-01, PNorm = 59.9737, GNorm = 0.7943, lr_0 = 5.2112e-04
Loss = 1.0886e-01, PNorm = 59.9877, GNorm = 1.2730, lr_0 = 5.2076e-04
Loss = 1.1029e-01, PNorm = 60.0025, GNorm = 1.5276, lr_0 = 5.2040e-04
Loss = 1.0220e-01, PNorm = 60.0143, GNorm = 0.4090, lr_0 = 5.2005e-04
Loss = 9.9188e-02, PNorm = 60.0262, GNorm = 0.6795, lr_0 = 5.1969e-04
Loss = 1.1099e-01, PNorm = 60.0352, GNorm = 1.3174, lr_0 = 5.1933e-04
Loss = 1.1822e-01, PNorm = 60.0467, GNorm = 0.8359, lr_0 = 5.1898e-04
Loss = 9.5630e-02, PNorm = 60.0604, GNorm = 0.4977, lr_0 = 5.1862e-04
Loss = 1.0704e-01, PNorm = 60.0675, GNorm = 0.5445, lr_0 = 5.1827e-04
Loss = 1.0124e-01, PNorm = 60.0769, GNorm = 0.5725, lr_0 = 5.1791e-04
Validation mae = 0.404676
Epoch 10
Loss = 8.1005e-02, PNorm = 60.0858, GNorm = 0.5411, lr_0 = 5.1756e-04
Loss = 8.6502e-02, PNorm = 60.0964, GNorm = 0.6822, lr_0 = 5.1720e-04
Loss = 9.0789e-02, PNorm = 60.1080, GNorm = 0.5766, lr_0 = 5.1685e-04
Loss = 8.2233e-02, PNorm = 60.1235, GNorm = 0.6222, lr_0 = 5.1649e-04
Loss = 9.1071e-02, PNorm = 60.1372, GNorm = 0.5825, lr_0 = 5.1614e-04
Loss = 7.8774e-02, PNorm = 60.1511, GNorm = 0.5334, lr_0 = 5.1579e-04
Loss = 8.3607e-02, PNorm = 60.1656, GNorm = 0.6194, lr_0 = 5.1543e-04
Loss = 9.7139e-02, PNorm = 60.1786, GNorm = 0.5275, lr_0 = 5.1508e-04
Loss = 8.3928e-02, PNorm = 60.1924, GNorm = 0.7084, lr_0 = 5.1473e-04
Loss = 8.8655e-02, PNorm = 60.2082, GNorm = 0.7216, lr_0 = 5.1437e-04
Loss = 9.0603e-02, PNorm = 60.2237, GNorm = 0.6349, lr_0 = 5.1402e-04
Loss = 1.1583e-01, PNorm = 60.2381, GNorm = 1.2157, lr_0 = 5.1367e-04
Loss = 1.0248e-01, PNorm = 60.2548, GNorm = 1.0588, lr_0 = 5.1332e-04
Loss = 8.7695e-02, PNorm = 60.2676, GNorm = 0.5362, lr_0 = 5.1297e-04
Loss = 1.1107e-01, PNorm = 60.2830, GNorm = 1.2410, lr_0 = 5.1262e-04
Loss = 9.9826e-02, PNorm = 60.2992, GNorm = 1.3160, lr_0 = 5.1226e-04
Loss = 9.2090e-02, PNorm = 60.3082, GNorm = 0.5854, lr_0 = 5.1191e-04
Loss = 8.9977e-02, PNorm = 60.3215, GNorm = 1.0601, lr_0 = 5.1156e-04
Loss = 9.9287e-02, PNorm = 60.3388, GNorm = 0.6635, lr_0 = 5.1121e-04
Loss = 9.4563e-02, PNorm = 60.3571, GNorm = 0.8954, lr_0 = 5.1086e-04
Loss = 1.0396e-01, PNorm = 60.3735, GNorm = 0.6934, lr_0 = 5.1051e-04
Loss = 9.9090e-02, PNorm = 60.3831, GNorm = 0.7795, lr_0 = 5.1016e-04
Loss = 8.7352e-02, PNorm = 60.3932, GNorm = 0.8170, lr_0 = 5.0981e-04
Loss = 1.0021e-01, PNorm = 60.4063, GNorm = 0.8809, lr_0 = 5.0946e-04
Loss = 9.4418e-02, PNorm = 60.4231, GNorm = 0.6959, lr_0 = 5.0911e-04
Loss = 8.5350e-02, PNorm = 60.4372, GNorm = 0.5788, lr_0 = 5.0877e-04
Loss = 8.9530e-02, PNorm = 60.4478, GNorm = 0.8464, lr_0 = 5.0842e-04
Loss = 9.4148e-02, PNorm = 60.4598, GNorm = 0.5247, lr_0 = 5.0807e-04
Loss = 8.0067e-02, PNorm = 60.4755, GNorm = 0.5153, lr_0 = 5.0772e-04
Loss = 8.9906e-02, PNorm = 60.4907, GNorm = 0.5277, lr_0 = 5.0737e-04
Loss = 9.0089e-02, PNorm = 60.5044, GNorm = 0.7979, lr_0 = 5.0703e-04
Loss = 9.4048e-02, PNorm = 60.5182, GNorm = 0.6506, lr_0 = 5.0668e-04
Loss = 1.0115e-01, PNorm = 60.5325, GNorm = 0.8564, lr_0 = 5.0633e-04
Loss = 8.7488e-02, PNorm = 60.5460, GNorm = 1.5442, lr_0 = 5.0598e-04
Loss = 9.4344e-02, PNorm = 60.5576, GNorm = 0.4768, lr_0 = 5.0564e-04
Loss = 9.6791e-02, PNorm = 60.5730, GNorm = 0.5565, lr_0 = 5.0529e-04
Loss = 9.5465e-02, PNorm = 60.5842, GNorm = 0.6435, lr_0 = 5.0494e-04
Loss = 9.5058e-02, PNorm = 60.5905, GNorm = 1.0591, lr_0 = 5.0460e-04
Loss = 8.8626e-02, PNorm = 60.6047, GNorm = 0.9207, lr_0 = 5.0425e-04
Loss = 9.7674e-02, PNorm = 60.6208, GNorm = 0.7217, lr_0 = 5.0391e-04
Loss = 9.5456e-02, PNorm = 60.6355, GNorm = 0.7278, lr_0 = 5.0356e-04
Loss = 9.3672e-02, PNorm = 60.6440, GNorm = 1.1839, lr_0 = 5.0322e-04
Loss = 9.8523e-02, PNorm = 60.6513, GNorm = 1.2729, lr_0 = 5.0287e-04
Loss = 9.5194e-02, PNorm = 60.6619, GNorm = 0.5584, lr_0 = 5.0253e-04
Loss = 8.2414e-02, PNorm = 60.6753, GNorm = 0.6931, lr_0 = 5.0218e-04
Loss = 9.2095e-02, PNorm = 60.6891, GNorm = 0.5660, lr_0 = 5.0184e-04
Loss = 9.6911e-02, PNorm = 60.7000, GNorm = 0.5052, lr_0 = 5.0150e-04
Loss = 9.4712e-02, PNorm = 60.7076, GNorm = 0.6484, lr_0 = 5.0115e-04
Loss = 7.4850e-02, PNorm = 60.7150, GNorm = 1.1680, lr_0 = 5.0081e-04
Loss = 8.0522e-02, PNorm = 60.7307, GNorm = 0.6300, lr_0 = 5.0047e-04
Loss = 9.0275e-02, PNorm = 60.7453, GNorm = 0.6245, lr_0 = 5.0012e-04
Loss = 9.5383e-02, PNorm = 60.7539, GNorm = 0.5918, lr_0 = 4.9978e-04
Loss = 1.0482e-01, PNorm = 60.7655, GNorm = 0.4144, lr_0 = 4.9944e-04
Loss = 8.0331e-02, PNorm = 60.7842, GNorm = 0.5250, lr_0 = 4.9910e-04
Loss = 9.8355e-02, PNorm = 60.8016, GNorm = 0.7323, lr_0 = 4.9875e-04
Loss = 8.6795e-02, PNorm = 60.8102, GNorm = 0.5467, lr_0 = 4.9841e-04
Loss = 9.4145e-02, PNorm = 60.8259, GNorm = 0.5280, lr_0 = 4.9807e-04
Loss = 7.9859e-02, PNorm = 60.8417, GNorm = 0.7600, lr_0 = 4.9773e-04
Loss = 8.1584e-02, PNorm = 60.8545, GNorm = 0.5530, lr_0 = 4.9739e-04
Loss = 8.6494e-02, PNorm = 60.8633, GNorm = 0.5816, lr_0 = 4.9705e-04
Loss = 8.9704e-02, PNorm = 60.8747, GNorm = 0.4267, lr_0 = 4.9671e-04
Loss = 1.0374e-01, PNorm = 60.8908, GNorm = 0.8442, lr_0 = 4.9637e-04
Loss = 8.4184e-02, PNorm = 60.9042, GNorm = 0.7066, lr_0 = 4.9603e-04
Loss = 9.1772e-02, PNorm = 60.9144, GNorm = 0.5921, lr_0 = 4.9569e-04
Loss = 1.0833e-01, PNorm = 60.9239, GNorm = 0.6802, lr_0 = 4.9535e-04
Loss = 9.9517e-02, PNorm = 60.9351, GNorm = 0.7069, lr_0 = 4.9501e-04
Loss = 9.2682e-02, PNorm = 60.9457, GNorm = 1.2669, lr_0 = 4.9467e-04
Loss = 8.8927e-02, PNorm = 60.9529, GNorm = 0.5127, lr_0 = 4.9433e-04
Loss = 9.1841e-02, PNorm = 60.9634, GNorm = 0.6471, lr_0 = 4.9399e-04
Loss = 1.0260e-01, PNorm = 60.9747, GNorm = 0.7414, lr_0 = 4.9365e-04
Loss = 8.6109e-02, PNorm = 60.9870, GNorm = 0.8064, lr_0 = 4.9332e-04
Loss = 1.0221e-01, PNorm = 60.9985, GNorm = 0.6565, lr_0 = 4.9298e-04
Loss = 8.9859e-02, PNorm = 61.0129, GNorm = 0.6237, lr_0 = 4.9264e-04
Loss = 8.9646e-02, PNorm = 61.0281, GNorm = 0.8702, lr_0 = 4.9230e-04
Loss = 8.5880e-02, PNorm = 61.0370, GNorm = 1.4215, lr_0 = 4.9197e-04
Loss = 1.0846e-01, PNorm = 61.0466, GNorm = 0.6449, lr_0 = 4.9163e-04
Loss = 1.0325e-01, PNorm = 61.0613, GNorm = 1.3112, lr_0 = 4.9129e-04
Loss = 9.1480e-02, PNorm = 61.0773, GNorm = 0.6577, lr_0 = 4.9095e-04
Loss = 8.9869e-02, PNorm = 61.0905, GNorm = 0.6703, lr_0 = 4.9062e-04
Loss = 9.1055e-02, PNorm = 61.1013, GNorm = 0.6357, lr_0 = 4.9028e-04
Loss = 8.9348e-02, PNorm = 61.1104, GNorm = 0.7435, lr_0 = 4.8995e-04
Loss = 1.0089e-01, PNorm = 61.1219, GNorm = 0.7137, lr_0 = 4.8961e-04
Loss = 8.7743e-02, PNorm = 61.1330, GNorm = 0.5708, lr_0 = 4.8928e-04
Loss = 9.8748e-02, PNorm = 61.1471, GNorm = 0.6152, lr_0 = 4.8894e-04
Loss = 9.9039e-02, PNorm = 61.1577, GNorm = 0.6305, lr_0 = 4.8861e-04
Loss = 9.3389e-02, PNorm = 61.1676, GNorm = 0.8272, lr_0 = 4.8827e-04
Loss = 1.0411e-01, PNorm = 61.1797, GNorm = 0.8927, lr_0 = 4.8794e-04
Loss = 1.0127e-01, PNorm = 61.1976, GNorm = 1.1559, lr_0 = 4.8760e-04
Loss = 9.7840e-02, PNorm = 61.2114, GNorm = 0.6851, lr_0 = 4.8727e-04
Loss = 1.1925e-01, PNorm = 61.2214, GNorm = 0.8009, lr_0 = 4.8693e-04
Loss = 9.6316e-02, PNorm = 61.2379, GNorm = 0.5827, lr_0 = 4.8660e-04
Loss = 1.0005e-01, PNorm = 61.2550, GNorm = 1.5176, lr_0 = 4.8627e-04
Loss = 9.0601e-02, PNorm = 61.2704, GNorm = 0.6669, lr_0 = 4.8593e-04
Loss = 1.0263e-01, PNorm = 61.2859, GNorm = 0.9775, lr_0 = 4.8560e-04
Loss = 9.3532e-02, PNorm = 61.3025, GNorm = 1.2091, lr_0 = 4.8527e-04
Loss = 1.0699e-01, PNorm = 61.3154, GNorm = 0.6120, lr_0 = 4.8494e-04
Loss = 9.4746e-02, PNorm = 61.3242, GNorm = 0.5407, lr_0 = 4.8460e-04
Loss = 9.5547e-02, PNorm = 61.3351, GNorm = 0.6439, lr_0 = 4.8427e-04
Loss = 9.5107e-02, PNorm = 61.3483, GNorm = 0.4759, lr_0 = 4.8394e-04
Loss = 9.3615e-02, PNorm = 61.3629, GNorm = 0.9604, lr_0 = 4.8361e-04
Loss = 9.0741e-02, PNorm = 61.3806, GNorm = 1.0171, lr_0 = 4.8328e-04
Loss = 9.3854e-02, PNorm = 61.3937, GNorm = 0.9401, lr_0 = 4.8295e-04
Loss = 7.7082e-02, PNorm = 61.4044, GNorm = 0.6406, lr_0 = 4.8262e-04
Loss = 1.0152e-01, PNorm = 61.4142, GNorm = 1.2618, lr_0 = 4.8228e-04
Loss = 1.0296e-01, PNorm = 61.4284, GNorm = 0.9825, lr_0 = 4.8195e-04
Loss = 1.0207e-01, PNorm = 61.4428, GNorm = 0.4853, lr_0 = 4.8162e-04
Loss = 8.1687e-02, PNorm = 61.4565, GNorm = 0.5179, lr_0 = 4.8129e-04
Loss = 9.2674e-02, PNorm = 61.4685, GNorm = 0.4464, lr_0 = 4.8096e-04
Loss = 1.0318e-01, PNorm = 61.4805, GNorm = 0.5571, lr_0 = 4.8064e-04
Loss = 8.8500e-02, PNorm = 61.4933, GNorm = 0.6054, lr_0 = 4.8031e-04
Loss = 1.0008e-01, PNorm = 61.5038, GNorm = 0.6242, lr_0 = 4.7998e-04
Loss = 7.9667e-02, PNorm = 61.5138, GNorm = 0.4788, lr_0 = 4.7965e-04
Loss = 1.0143e-01, PNorm = 61.5216, GNorm = 0.4738, lr_0 = 4.7932e-04
Loss = 8.7276e-02, PNorm = 61.5298, GNorm = 0.8902, lr_0 = 4.7899e-04
Loss = 9.4761e-02, PNorm = 61.5402, GNorm = 0.6204, lr_0 = 4.7866e-04
Loss = 9.5785e-02, PNorm = 61.5507, GNorm = 0.6740, lr_0 = 4.7833e-04
Loss = 9.8277e-02, PNorm = 61.5619, GNorm = 0.7077, lr_0 = 4.7801e-04
Loss = 9.9161e-02, PNorm = 61.5746, GNorm = 0.6343, lr_0 = 4.7768e-04
Loss = 9.4885e-02, PNorm = 61.5882, GNorm = 0.5808, lr_0 = 4.7735e-04
Loss = 9.1873e-02, PNorm = 61.5988, GNorm = 0.5978, lr_0 = 4.7703e-04
Validation mae = 0.413063
Epoch 11
Loss = 8.4581e-02, PNorm = 61.6149, GNorm = 0.4408, lr_0 = 4.7670e-04
Loss = 8.1923e-02, PNorm = 61.6323, GNorm = 0.5813, lr_0 = 4.7637e-04
Loss = 9.3194e-02, PNorm = 61.6465, GNorm = 0.7131, lr_0 = 4.7605e-04
Loss = 7.2241e-02, PNorm = 61.6586, GNorm = 0.9644, lr_0 = 4.7572e-04
Loss = 8.9167e-02, PNorm = 61.6671, GNorm = 1.1008, lr_0 = 4.7539e-04
Loss = 8.6281e-02, PNorm = 61.6813, GNorm = 0.5993, lr_0 = 4.7507e-04
Loss = 8.9216e-02, PNorm = 61.6929, GNorm = 0.5053, lr_0 = 4.7474e-04
Loss = 7.3539e-02, PNorm = 61.7050, GNorm = 0.7130, lr_0 = 4.7442e-04
Loss = 9.7572e-02, PNorm = 61.7224, GNorm = 0.7592, lr_0 = 4.7409e-04
Loss = 8.9168e-02, PNorm = 61.7330, GNorm = 0.9916, lr_0 = 4.7377e-04
Loss = 8.7437e-02, PNorm = 61.7413, GNorm = 1.1491, lr_0 = 4.7344e-04
Loss = 7.6276e-02, PNorm = 61.7516, GNorm = 0.5378, lr_0 = 4.7312e-04
Loss = 7.7455e-02, PNorm = 61.7607, GNorm = 0.7634, lr_0 = 4.7279e-04
Loss = 9.1020e-02, PNorm = 61.7710, GNorm = 0.9407, lr_0 = 4.7247e-04
Loss = 9.5203e-02, PNorm = 61.7803, GNorm = 0.8179, lr_0 = 4.7215e-04
Loss = 9.6596e-02, PNorm = 61.7927, GNorm = 1.0429, lr_0 = 4.7182e-04
Loss = 8.8625e-02, PNorm = 61.8064, GNorm = 0.5157, lr_0 = 4.7150e-04
Loss = 9.3859e-02, PNorm = 61.8175, GNorm = 0.6754, lr_0 = 4.7118e-04
Loss = 9.4219e-02, PNorm = 61.8327, GNorm = 0.6216, lr_0 = 4.7085e-04
Loss = 8.9139e-02, PNorm = 61.8487, GNorm = 0.6250, lr_0 = 4.7053e-04
Loss = 9.1653e-02, PNorm = 61.8612, GNorm = 0.7776, lr_0 = 4.7021e-04
Loss = 7.8661e-02, PNorm = 61.8710, GNorm = 0.5639, lr_0 = 4.6989e-04
Loss = 8.5386e-02, PNorm = 61.8844, GNorm = 1.1053, lr_0 = 4.6957e-04
Loss = 7.7093e-02, PNorm = 61.8943, GNorm = 1.2921, lr_0 = 4.6924e-04
Loss = 8.1052e-02, PNorm = 61.9042, GNorm = 0.6575, lr_0 = 4.6892e-04
Loss = 8.7077e-02, PNorm = 61.9135, GNorm = 0.4858, lr_0 = 4.6860e-04
Loss = 7.6534e-02, PNorm = 61.9245, GNorm = 1.0500, lr_0 = 4.6828e-04
Loss = 7.9127e-02, PNorm = 61.9320, GNorm = 0.4972, lr_0 = 4.6796e-04
Loss = 8.4336e-02, PNorm = 61.9401, GNorm = 1.5214, lr_0 = 4.6764e-04
Loss = 1.0729e-01, PNorm = 61.9464, GNorm = 0.5571, lr_0 = 4.6732e-04
Loss = 8.4626e-02, PNorm = 61.9575, GNorm = 0.4970, lr_0 = 4.6700e-04
Loss = 8.4860e-02, PNorm = 61.9691, GNorm = 0.5757, lr_0 = 4.6668e-04
Loss = 9.3444e-02, PNorm = 61.9789, GNorm = 0.9153, lr_0 = 4.6636e-04
Loss = 8.0091e-02, PNorm = 61.9878, GNorm = 0.8972, lr_0 = 4.6604e-04
Loss = 8.2484e-02, PNorm = 61.9989, GNorm = 0.4995, lr_0 = 4.6572e-04
Loss = 8.6452e-02, PNorm = 62.0102, GNorm = 0.6790, lr_0 = 4.6540e-04
Loss = 8.5012e-02, PNorm = 62.0214, GNorm = 0.6508, lr_0 = 4.6508e-04
Loss = 9.3236e-02, PNorm = 62.0360, GNorm = 0.7089, lr_0 = 4.6476e-04
Loss = 8.1195e-02, PNorm = 62.0496, GNorm = 0.5671, lr_0 = 4.6445e-04
Loss = 9.4424e-02, PNorm = 62.0624, GNorm = 0.6171, lr_0 = 4.6413e-04
Loss = 8.3108e-02, PNorm = 62.0767, GNorm = 0.5314, lr_0 = 4.6381e-04
Loss = 9.5488e-02, PNorm = 62.0906, GNorm = 0.8606, lr_0 = 4.6349e-04
Loss = 8.3197e-02, PNorm = 62.0993, GNorm = 0.6007, lr_0 = 4.6317e-04
Loss = 8.7039e-02, PNorm = 62.1098, GNorm = 0.5492, lr_0 = 4.6286e-04
Loss = 8.1333e-02, PNorm = 62.1221, GNorm = 0.6041, lr_0 = 4.6254e-04
Loss = 8.6865e-02, PNorm = 62.1314, GNorm = 0.5638, lr_0 = 4.6222e-04
Loss = 8.6051e-02, PNorm = 62.1439, GNorm = 0.6926, lr_0 = 4.6191e-04
Loss = 7.7680e-02, PNorm = 62.1543, GNorm = 0.6621, lr_0 = 4.6159e-04
Loss = 8.4308e-02, PNorm = 62.1633, GNorm = 0.5955, lr_0 = 4.6127e-04
Loss = 8.3606e-02, PNorm = 62.1694, GNorm = 0.6723, lr_0 = 4.6096e-04
Loss = 9.5863e-02, PNorm = 62.1792, GNorm = 0.6081, lr_0 = 4.6064e-04
Loss = 8.6253e-02, PNorm = 62.1941, GNorm = 0.7922, lr_0 = 4.6033e-04
Loss = 7.6612e-02, PNorm = 62.2097, GNorm = 0.5454, lr_0 = 4.6001e-04
Loss = 8.2041e-02, PNorm = 62.2230, GNorm = 0.6451, lr_0 = 4.5970e-04
Loss = 8.3138e-02, PNorm = 62.2334, GNorm = 0.4937, lr_0 = 4.5938e-04
Loss = 8.6648e-02, PNorm = 62.2458, GNorm = 0.6968, lr_0 = 4.5907e-04
Loss = 9.4301e-02, PNorm = 62.2607, GNorm = 0.6642, lr_0 = 4.5875e-04
Loss = 8.2966e-02, PNorm = 62.2718, GNorm = 0.9450, lr_0 = 4.5844e-04
Loss = 9.3707e-02, PNorm = 62.2811, GNorm = 0.5707, lr_0 = 4.5812e-04
Loss = 1.0345e-01, PNorm = 62.2922, GNorm = 0.8646, lr_0 = 4.5781e-04
Loss = 7.6704e-02, PNorm = 62.3032, GNorm = 0.5011, lr_0 = 4.5750e-04
Loss = 8.1948e-02, PNorm = 62.3086, GNorm = 0.6858, lr_0 = 4.5718e-04
Loss = 9.2342e-02, PNorm = 62.3096, GNorm = 0.5721, lr_0 = 4.5687e-04
Loss = 7.5500e-02, PNorm = 62.3158, GNorm = 0.5382, lr_0 = 4.5656e-04
Loss = 7.6667e-02, PNorm = 62.3241, GNorm = 0.8268, lr_0 = 4.5624e-04
Loss = 8.6585e-02, PNorm = 62.3355, GNorm = 1.0942, lr_0 = 4.5593e-04
Loss = 8.6223e-02, PNorm = 62.3515, GNorm = 0.6839, lr_0 = 4.5562e-04
Loss = 8.9734e-02, PNorm = 62.3639, GNorm = 0.5776, lr_0 = 4.5531e-04
Loss = 8.2116e-02, PNorm = 62.3765, GNorm = 0.9169, lr_0 = 4.5499e-04
Loss = 8.5661e-02, PNorm = 62.3923, GNorm = 0.7846, lr_0 = 4.5468e-04
Loss = 8.7209e-02, PNorm = 62.4020, GNorm = 0.5762, lr_0 = 4.5437e-04
Loss = 8.6321e-02, PNorm = 62.4101, GNorm = 0.4367, lr_0 = 4.5406e-04
Loss = 9.1943e-02, PNorm = 62.4194, GNorm = 1.2849, lr_0 = 4.5375e-04
Loss = 8.6246e-02, PNorm = 62.4318, GNorm = 0.8214, lr_0 = 4.5344e-04
Loss = 1.0039e-01, PNorm = 62.4424, GNorm = 1.0356, lr_0 = 4.5313e-04
Loss = 8.9601e-02, PNorm = 62.4564, GNorm = 0.6357, lr_0 = 4.5282e-04
Loss = 8.3963e-02, PNorm = 62.4662, GNorm = 0.8720, lr_0 = 4.5251e-04
Loss = 1.0702e-01, PNorm = 62.4751, GNorm = 1.5330, lr_0 = 4.5220e-04
Loss = 1.0320e-01, PNorm = 62.4860, GNorm = 0.9326, lr_0 = 4.5189e-04
Loss = 8.6596e-02, PNorm = 62.5001, GNorm = 0.5387, lr_0 = 4.5158e-04
Loss = 7.3311e-02, PNorm = 62.5155, GNorm = 0.7609, lr_0 = 4.5127e-04
Loss = 8.4227e-02, PNorm = 62.5255, GNorm = 0.6772, lr_0 = 4.5096e-04
Loss = 8.3218e-02, PNorm = 62.5316, GNorm = 0.5910, lr_0 = 4.5065e-04
Loss = 8.7236e-02, PNorm = 62.5393, GNorm = 0.7794, lr_0 = 4.5034e-04
Loss = 8.3866e-02, PNorm = 62.5484, GNorm = 0.3890, lr_0 = 4.5003e-04
Loss = 8.9078e-02, PNorm = 62.5617, GNorm = 0.6842, lr_0 = 4.4972e-04
Loss = 8.7354e-02, PNorm = 62.5713, GNorm = 0.6150, lr_0 = 4.4942e-04
Loss = 9.3688e-02, PNorm = 62.5839, GNorm = 0.6525, lr_0 = 4.4911e-04
Loss = 8.8590e-02, PNorm = 62.5938, GNorm = 0.7652, lr_0 = 4.4880e-04
Loss = 7.6881e-02, PNorm = 62.6005, GNorm = 0.7606, lr_0 = 4.4849e-04
Loss = 9.2604e-02, PNorm = 62.6115, GNorm = 0.6169, lr_0 = 4.4819e-04
Loss = 7.4171e-02, PNorm = 62.6230, GNorm = 0.4855, lr_0 = 4.4788e-04
Loss = 8.0795e-02, PNorm = 62.6296, GNorm = 1.0793, lr_0 = 4.4757e-04
Loss = 8.6647e-02, PNorm = 62.6348, GNorm = 1.2235, lr_0 = 4.4727e-04
Loss = 1.0594e-01, PNorm = 62.6442, GNorm = 0.6967, lr_0 = 4.4696e-04
Loss = 8.5379e-02, PNorm = 62.6592, GNorm = 0.5287, lr_0 = 4.4665e-04
Loss = 8.7553e-02, PNorm = 62.6697, GNorm = 0.4535, lr_0 = 4.4635e-04
Loss = 8.4988e-02, PNorm = 62.6824, GNorm = 0.4794, lr_0 = 4.4604e-04
Loss = 8.4408e-02, PNorm = 62.6920, GNorm = 0.8035, lr_0 = 4.4574e-04
Loss = 8.7969e-02, PNorm = 62.7012, GNorm = 0.6806, lr_0 = 4.4543e-04
Loss = 8.3823e-02, PNorm = 62.7085, GNorm = 0.7849, lr_0 = 4.4513e-04
Loss = 8.5074e-02, PNorm = 62.7179, GNorm = 0.5469, lr_0 = 4.4482e-04
Loss = 9.1179e-02, PNorm = 62.7268, GNorm = 1.3592, lr_0 = 4.4452e-04
Loss = 9.3668e-02, PNorm = 62.7372, GNorm = 1.0803, lr_0 = 4.4421e-04
Loss = 7.3001e-02, PNorm = 62.7465, GNorm = 0.5205, lr_0 = 4.4391e-04
Loss = 8.1987e-02, PNorm = 62.7532, GNorm = 0.8690, lr_0 = 4.4360e-04
Loss = 8.6918e-02, PNorm = 62.7623, GNorm = 0.8829, lr_0 = 4.4330e-04
Loss = 8.5209e-02, PNorm = 62.7754, GNorm = 0.8504, lr_0 = 4.4299e-04
Loss = 9.0885e-02, PNorm = 62.7876, GNorm = 1.3919, lr_0 = 4.4269e-04
Loss = 8.5131e-02, PNorm = 62.8000, GNorm = 1.0292, lr_0 = 4.4239e-04
Loss = 9.2746e-02, PNorm = 62.8103, GNorm = 0.4228, lr_0 = 4.4209e-04
Loss = 9.5522e-02, PNorm = 62.8215, GNorm = 1.5426, lr_0 = 4.4178e-04
Loss = 7.6321e-02, PNorm = 62.8306, GNorm = 0.4951, lr_0 = 4.4148e-04
Loss = 8.4307e-02, PNorm = 62.8373, GNorm = 0.7749, lr_0 = 4.4118e-04
Loss = 7.7850e-02, PNorm = 62.8458, GNorm = 0.5798, lr_0 = 4.4088e-04
Loss = 8.4218e-02, PNorm = 62.8552, GNorm = 0.6377, lr_0 = 4.4057e-04
Loss = 8.7890e-02, PNorm = 62.8650, GNorm = 0.8287, lr_0 = 4.4027e-04
Loss = 8.7255e-02, PNorm = 62.8761, GNorm = 0.7164, lr_0 = 4.3997e-04
Loss = 9.6533e-02, PNorm = 62.8881, GNorm = 0.9784, lr_0 = 4.3967e-04
Loss = 8.2742e-02, PNorm = 62.8986, GNorm = 0.5048, lr_0 = 4.3937e-04
Validation mae = 0.394166
Epoch 12
Loss = 6.5416e-02, PNorm = 62.9108, GNorm = 0.5441, lr_0 = 4.3907e-04
Loss = 7.8677e-02, PNorm = 62.9215, GNorm = 1.1463, lr_0 = 4.3877e-04
Loss = 8.6105e-02, PNorm = 62.9332, GNorm = 0.5406, lr_0 = 4.3846e-04
Loss = 7.7774e-02, PNorm = 62.9438, GNorm = 0.9537, lr_0 = 4.3816e-04
Loss = 8.2131e-02, PNorm = 62.9537, GNorm = 0.8357, lr_0 = 4.3786e-04
Loss = 6.5508e-02, PNorm = 62.9614, GNorm = 0.5859, lr_0 = 4.3756e-04
Loss = 7.6283e-02, PNorm = 62.9707, GNorm = 0.5419, lr_0 = 4.3726e-04
Loss = 5.7583e-02, PNorm = 62.9782, GNorm = 0.6223, lr_0 = 4.3696e-04
Loss = 6.5731e-02, PNorm = 62.9817, GNorm = 0.4994, lr_0 = 4.3667e-04
Loss = 7.3441e-02, PNorm = 62.9888, GNorm = 0.7521, lr_0 = 4.3637e-04
Loss = 7.1049e-02, PNorm = 63.0013, GNorm = 0.8515, lr_0 = 4.3607e-04
Loss = 8.9164e-02, PNorm = 63.0124, GNorm = 0.5183, lr_0 = 4.3577e-04
Loss = 9.7436e-02, PNorm = 63.0232, GNorm = 0.4282, lr_0 = 4.3547e-04
Loss = 8.2406e-02, PNorm = 63.0320, GNorm = 0.5583, lr_0 = 4.3517e-04
Loss = 8.2337e-02, PNorm = 63.0404, GNorm = 0.9761, lr_0 = 4.3487e-04
Loss = 7.6548e-02, PNorm = 63.0517, GNorm = 0.8276, lr_0 = 4.3458e-04
Loss = 7.5615e-02, PNorm = 63.0619, GNorm = 0.4632, lr_0 = 4.3428e-04
Loss = 7.0232e-02, PNorm = 63.0717, GNorm = 0.6608, lr_0 = 4.3398e-04
Loss = 7.5143e-02, PNorm = 63.0819, GNorm = 0.6108, lr_0 = 4.3368e-04
Loss = 7.2774e-02, PNorm = 63.0926, GNorm = 0.7570, lr_0 = 4.3339e-04
Loss = 7.3501e-02, PNorm = 63.1022, GNorm = 0.4904, lr_0 = 4.3309e-04
Loss = 7.5971e-02, PNorm = 63.1102, GNorm = 0.6666, lr_0 = 4.3279e-04
Loss = 7.1196e-02, PNorm = 63.1213, GNorm = 0.7069, lr_0 = 4.3250e-04
Loss = 7.1672e-02, PNorm = 63.1359, GNorm = 0.3413, lr_0 = 4.3220e-04
Loss = 7.5940e-02, PNorm = 63.1453, GNorm = 0.5497, lr_0 = 4.3190e-04
Loss = 8.4002e-02, PNorm = 63.1572, GNorm = 1.1434, lr_0 = 4.3161e-04
Loss = 7.9002e-02, PNorm = 63.1696, GNorm = 0.9584, lr_0 = 4.3131e-04
Loss = 8.0874e-02, PNorm = 63.1782, GNorm = 0.7494, lr_0 = 4.3102e-04
Loss = 7.8916e-02, PNorm = 63.1885, GNorm = 0.8449, lr_0 = 4.3072e-04
Loss = 8.7458e-02, PNorm = 63.1973, GNorm = 0.7582, lr_0 = 4.3043e-04
Loss = 7.1456e-02, PNorm = 63.2087, GNorm = 0.5991, lr_0 = 4.3013e-04
Loss = 8.6655e-02, PNorm = 63.2197, GNorm = 0.6394, lr_0 = 4.2984e-04
Loss = 7.3834e-02, PNorm = 63.2230, GNorm = 0.4694, lr_0 = 4.2954e-04
Loss = 7.2466e-02, PNorm = 63.2297, GNorm = 0.4718, lr_0 = 4.2925e-04
Loss = 8.8927e-02, PNorm = 63.2424, GNorm = 0.5121, lr_0 = 4.2895e-04
Loss = 8.8177e-02, PNorm = 63.2525, GNorm = 0.8574, lr_0 = 4.2866e-04
Loss = 7.6530e-02, PNorm = 63.2623, GNorm = 0.5664, lr_0 = 4.2837e-04
Loss = 8.4847e-02, PNorm = 63.2691, GNorm = 0.5210, lr_0 = 4.2807e-04
Loss = 8.1697e-02, PNorm = 63.2776, GNorm = 0.5922, lr_0 = 4.2778e-04
Loss = 6.7723e-02, PNorm = 63.2871, GNorm = 0.6786, lr_0 = 4.2749e-04
Loss = 8.0064e-02, PNorm = 63.2986, GNorm = 0.4608, lr_0 = 4.2719e-04
Loss = 8.0379e-02, PNorm = 63.3097, GNorm = 1.5831, lr_0 = 4.2690e-04
Loss = 1.0580e-01, PNorm = 63.3269, GNorm = 0.8095, lr_0 = 4.2661e-04
Loss = 8.9921e-02, PNorm = 63.3414, GNorm = 0.9926, lr_0 = 4.2632e-04
Loss = 8.0041e-02, PNorm = 63.3517, GNorm = 0.6558, lr_0 = 4.2602e-04
Loss = 8.9233e-02, PNorm = 63.3600, GNorm = 0.5706, lr_0 = 4.2573e-04
Loss = 7.8510e-02, PNorm = 63.3700, GNorm = 0.8500, lr_0 = 4.2544e-04
Loss = 8.2733e-02, PNorm = 63.3820, GNorm = 0.4341, lr_0 = 4.2515e-04
Loss = 7.2249e-02, PNorm = 63.3935, GNorm = 0.7593, lr_0 = 4.2486e-04
Loss = 8.0909e-02, PNorm = 63.4026, GNorm = 0.5597, lr_0 = 4.2457e-04
Loss = 8.2298e-02, PNorm = 63.4174, GNorm = 0.6070, lr_0 = 4.2428e-04
Loss = 7.2802e-02, PNorm = 63.4291, GNorm = 0.6082, lr_0 = 4.2399e-04
Loss = 9.0223e-02, PNorm = 63.4392, GNorm = 1.3662, lr_0 = 4.2370e-04
Loss = 7.9415e-02, PNorm = 63.4516, GNorm = 0.5084, lr_0 = 4.2340e-04
Loss = 9.3117e-02, PNorm = 63.4688, GNorm = 1.0896, lr_0 = 4.2311e-04
Loss = 6.4993e-02, PNorm = 63.4812, GNorm = 0.4205, lr_0 = 4.2283e-04
Loss = 8.6645e-02, PNorm = 63.4925, GNorm = 0.5478, lr_0 = 4.2254e-04
Loss = 7.9391e-02, PNorm = 63.5025, GNorm = 0.7452, lr_0 = 4.2225e-04
Loss = 6.9608e-02, PNorm = 63.5084, GNorm = 0.7139, lr_0 = 4.2196e-04
Loss = 7.7954e-02, PNorm = 63.5184, GNorm = 0.7640, lr_0 = 4.2167e-04
Loss = 8.3338e-02, PNorm = 63.5291, GNorm = 0.6595, lr_0 = 4.2138e-04
Loss = 8.3646e-02, PNorm = 63.5410, GNorm = 0.8667, lr_0 = 4.2109e-04
Loss = 9.2680e-02, PNorm = 63.5531, GNorm = 1.2058, lr_0 = 4.2080e-04
Loss = 8.9426e-02, PNorm = 63.5628, GNorm = 0.6974, lr_0 = 4.2051e-04
Loss = 7.7952e-02, PNorm = 63.5703, GNorm = 0.6099, lr_0 = 4.2023e-04
Loss = 7.3591e-02, PNorm = 63.5806, GNorm = 0.7130, lr_0 = 4.1994e-04
Loss = 7.0802e-02, PNorm = 63.5887, GNorm = 0.5956, lr_0 = 4.1965e-04
Loss = 7.2186e-02, PNorm = 63.5986, GNorm = 0.6362, lr_0 = 4.1936e-04
Loss = 9.0165e-02, PNorm = 63.6072, GNorm = 0.6267, lr_0 = 4.1907e-04
Loss = 8.0698e-02, PNorm = 63.6175, GNorm = 0.6311, lr_0 = 4.1879e-04
Loss = 7.5172e-02, PNorm = 63.6256, GNorm = 1.5306, lr_0 = 4.1850e-04
Loss = 8.9827e-02, PNorm = 63.6363, GNorm = 0.9151, lr_0 = 4.1821e-04
Loss = 9.9443e-02, PNorm = 63.6504, GNorm = 0.8357, lr_0 = 4.1793e-04
Loss = 8.7254e-02, PNorm = 63.6616, GNorm = 0.6148, lr_0 = 4.1764e-04
Loss = 8.3740e-02, PNorm = 63.6719, GNorm = 0.7145, lr_0 = 4.1736e-04
Loss = 7.5442e-02, PNorm = 63.6817, GNorm = 0.5738, lr_0 = 4.1707e-04
Loss = 8.1977e-02, PNorm = 63.6907, GNorm = 0.4200, lr_0 = 4.1678e-04
Loss = 6.8736e-02, PNorm = 63.6990, GNorm = 0.6098, lr_0 = 4.1650e-04
Loss = 8.8476e-02, PNorm = 63.7079, GNorm = 0.6242, lr_0 = 4.1621e-04
Loss = 7.7056e-02, PNorm = 63.7158, GNorm = 0.7844, lr_0 = 4.1593e-04
Loss = 7.0664e-02, PNorm = 63.7240, GNorm = 0.5025, lr_0 = 4.1564e-04
Loss = 8.7769e-02, PNorm = 63.7304, GNorm = 0.6483, lr_0 = 4.1536e-04
Loss = 8.0554e-02, PNorm = 63.7389, GNorm = 1.1620, lr_0 = 4.1507e-04
Loss = 1.0060e-01, PNorm = 63.7515, GNorm = 0.7272, lr_0 = 4.1479e-04
Loss = 6.5341e-02, PNorm = 63.7601, GNorm = 0.4469, lr_0 = 4.1450e-04
Loss = 9.7166e-02, PNorm = 63.7689, GNorm = 0.6545, lr_0 = 4.1422e-04
Loss = 7.9835e-02, PNorm = 63.7781, GNorm = 0.4791, lr_0 = 4.1394e-04
Loss = 9.4365e-02, PNorm = 63.7868, GNorm = 0.9555, lr_0 = 4.1365e-04
Loss = 7.7405e-02, PNorm = 63.7972, GNorm = 0.5180, lr_0 = 4.1337e-04
Loss = 9.0691e-02, PNorm = 63.8073, GNorm = 0.8322, lr_0 = 4.1309e-04
Loss = 8.0988e-02, PNorm = 63.8163, GNorm = 0.9907, lr_0 = 4.1280e-04
Loss = 6.2503e-02, PNorm = 63.8213, GNorm = 0.6899, lr_0 = 4.1252e-04
Loss = 8.1294e-02, PNorm = 63.8297, GNorm = 0.5245, lr_0 = 4.1224e-04
Loss = 9.4572e-02, PNorm = 63.8401, GNorm = 0.6141, lr_0 = 4.1196e-04
Loss = 8.8938e-02, PNorm = 63.8523, GNorm = 1.5179, lr_0 = 4.1167e-04
Loss = 7.9261e-02, PNorm = 63.8613, GNorm = 0.6171, lr_0 = 4.1139e-04
Loss = 7.6027e-02, PNorm = 63.8689, GNorm = 0.4970, lr_0 = 4.1111e-04
Loss = 8.5924e-02, PNorm = 63.8765, GNorm = 0.5882, lr_0 = 4.1083e-04
Loss = 9.3000e-02, PNorm = 63.8839, GNorm = 1.1091, lr_0 = 4.1055e-04
Loss = 9.5774e-02, PNorm = 63.9008, GNorm = 0.6517, lr_0 = 4.1027e-04
Loss = 8.4048e-02, PNorm = 63.9122, GNorm = 0.8337, lr_0 = 4.0998e-04
Loss = 7.5619e-02, PNorm = 63.9224, GNorm = 0.6810, lr_0 = 4.0970e-04
Loss = 9.0149e-02, PNorm = 63.9290, GNorm = 0.6809, lr_0 = 4.0942e-04
Loss = 8.0109e-02, PNorm = 63.9373, GNorm = 0.4722, lr_0 = 4.0914e-04
Loss = 7.7482e-02, PNorm = 63.9458, GNorm = 0.7425, lr_0 = 4.0886e-04
Loss = 7.8728e-02, PNorm = 63.9506, GNorm = 0.5971, lr_0 = 4.0858e-04
Loss = 8.4854e-02, PNorm = 63.9632, GNorm = 0.4678, lr_0 = 4.0830e-04
Loss = 7.9885e-02, PNorm = 63.9736, GNorm = 0.6962, lr_0 = 4.0802e-04
Loss = 7.0426e-02, PNorm = 63.9814, GNorm = 0.6160, lr_0 = 4.0774e-04
Loss = 7.3034e-02, PNorm = 63.9923, GNorm = 0.7648, lr_0 = 4.0746e-04
Loss = 7.3862e-02, PNorm = 63.9998, GNorm = 0.5432, lr_0 = 4.0718e-04
Loss = 8.4296e-02, PNorm = 64.0071, GNorm = 0.8834, lr_0 = 4.0691e-04
Loss = 7.4071e-02, PNorm = 64.0123, GNorm = 0.8621, lr_0 = 4.0663e-04
Loss = 8.6136e-02, PNorm = 64.0193, GNorm = 0.7853, lr_0 = 4.0635e-04
Loss = 8.0207e-02, PNorm = 64.0265, GNorm = 0.7486, lr_0 = 4.0607e-04
Loss = 7.8984e-02, PNorm = 64.0347, GNorm = 0.8429, lr_0 = 4.0579e-04
Loss = 8.3575e-02, PNorm = 64.0414, GNorm = 0.8172, lr_0 = 4.0551e-04
Loss = 7.8761e-02, PNorm = 64.0522, GNorm = 0.7404, lr_0 = 4.0524e-04
Loss = 8.3699e-02, PNorm = 64.0601, GNorm = 0.8602, lr_0 = 4.0496e-04
Loss = 7.9869e-02, PNorm = 64.0656, GNorm = 0.9591, lr_0 = 4.0468e-04
Validation mae = 0.414378
Epoch 13
Loss = 7.1257e-02, PNorm = 64.0760, GNorm = 1.1599, lr_0 = 4.0440e-04
Loss = 7.9695e-02, PNorm = 64.0898, GNorm = 0.4656, lr_0 = 4.0413e-04
Loss = 6.2182e-02, PNorm = 64.1022, GNorm = 0.6700, lr_0 = 4.0385e-04
Loss = 6.8184e-02, PNorm = 64.1099, GNorm = 0.5102, lr_0 = 4.0357e-04
Loss = 6.3886e-02, PNorm = 64.1147, GNorm = 0.6775, lr_0 = 4.0330e-04
Loss = 7.2416e-02, PNorm = 64.1244, GNorm = 0.7958, lr_0 = 4.0302e-04
Loss = 8.1235e-02, PNorm = 64.1358, GNorm = 0.7116, lr_0 = 4.0274e-04
Loss = 5.2135e-02, PNorm = 64.1466, GNorm = 0.3996, lr_0 = 4.0247e-04
Loss = 6.5207e-02, PNorm = 64.1529, GNorm = 0.4875, lr_0 = 4.0219e-04
Loss = 7.7800e-02, PNorm = 64.1617, GNorm = 0.5414, lr_0 = 4.0192e-04
Loss = 7.2541e-02, PNorm = 64.1715, GNorm = 0.5296, lr_0 = 4.0164e-04
Loss = 7.5286e-02, PNorm = 64.1798, GNorm = 0.7460, lr_0 = 4.0137e-04
Loss = 8.2096e-02, PNorm = 64.1908, GNorm = 0.8195, lr_0 = 4.0109e-04
Loss = 6.9943e-02, PNorm = 64.2019, GNorm = 1.0866, lr_0 = 4.0082e-04
Loss = 6.7120e-02, PNorm = 64.2127, GNorm = 0.6651, lr_0 = 4.0054e-04
Loss = 7.3870e-02, PNorm = 64.2222, GNorm = 0.4780, lr_0 = 4.0027e-04
Loss = 5.6895e-02, PNorm = 64.2308, GNorm = 1.2834, lr_0 = 3.9999e-04
Loss = 7.5883e-02, PNorm = 64.2378, GNorm = 0.5363, lr_0 = 3.9972e-04
Loss = 7.3426e-02, PNorm = 64.2473, GNorm = 0.4406, lr_0 = 3.9945e-04
Loss = 6.8778e-02, PNorm = 64.2570, GNorm = 0.8361, lr_0 = 3.9917e-04
Loss = 6.6123e-02, PNorm = 64.2666, GNorm = 0.7954, lr_0 = 3.9890e-04
Loss = 7.4106e-02, PNorm = 64.2744, GNorm = 0.7810, lr_0 = 3.9863e-04
Loss = 7.4344e-02, PNorm = 64.2845, GNorm = 0.6716, lr_0 = 3.9835e-04
Loss = 7.1747e-02, PNorm = 64.2952, GNorm = 0.7209, lr_0 = 3.9808e-04
Loss = 7.0502e-02, PNorm = 64.3033, GNorm = 0.6712, lr_0 = 3.9781e-04
Loss = 6.9940e-02, PNorm = 64.3125, GNorm = 0.8463, lr_0 = 3.9753e-04
Loss = 7.0759e-02, PNorm = 64.3215, GNorm = 0.4382, lr_0 = 3.9726e-04
Loss = 7.8535e-02, PNorm = 64.3318, GNorm = 0.7645, lr_0 = 3.9699e-04
Loss = 7.9074e-02, PNorm = 64.3406, GNorm = 0.7170, lr_0 = 3.9672e-04
Loss = 7.2640e-02, PNorm = 64.3469, GNorm = 0.6505, lr_0 = 3.9645e-04
Loss = 6.7107e-02, PNorm = 64.3574, GNorm = 0.8782, lr_0 = 3.9617e-04
Loss = 7.3628e-02, PNorm = 64.3676, GNorm = 0.3958, lr_0 = 3.9590e-04
Loss = 6.5399e-02, PNorm = 64.3742, GNorm = 0.4708, lr_0 = 3.9563e-04
Loss = 7.0567e-02, PNorm = 64.3800, GNorm = 0.4619, lr_0 = 3.9536e-04
Loss = 7.5833e-02, PNorm = 64.3865, GNorm = 0.7206, lr_0 = 3.9509e-04
Loss = 7.1685e-02, PNorm = 64.3934, GNorm = 0.5526, lr_0 = 3.9482e-04
Loss = 8.1331e-02, PNorm = 64.4008, GNorm = 0.5288, lr_0 = 3.9455e-04
Loss = 7.8155e-02, PNorm = 64.4086, GNorm = 1.0654, lr_0 = 3.9428e-04
Loss = 7.8269e-02, PNorm = 64.4177, GNorm = 0.8592, lr_0 = 3.9401e-04
Loss = 7.5156e-02, PNorm = 64.4295, GNorm = 1.1313, lr_0 = 3.9374e-04
Loss = 7.4276e-02, PNorm = 64.4391, GNorm = 0.6454, lr_0 = 3.9347e-04
Loss = 7.7095e-02, PNorm = 64.4515, GNorm = 0.6512, lr_0 = 3.9320e-04
Loss = 7.0028e-02, PNorm = 64.4622, GNorm = 0.5996, lr_0 = 3.9293e-04
Loss = 7.5902e-02, PNorm = 64.4701, GNorm = 0.5761, lr_0 = 3.9266e-04
Loss = 8.5634e-02, PNorm = 64.4782, GNorm = 0.6788, lr_0 = 3.9239e-04
Loss = 6.1216e-02, PNorm = 64.4879, GNorm = 0.5899, lr_0 = 3.9212e-04
Loss = 7.1777e-02, PNorm = 64.4954, GNorm = 0.4738, lr_0 = 3.9185e-04
Loss = 6.5860e-02, PNorm = 64.5043, GNorm = 0.5208, lr_0 = 3.9159e-04
Loss = 7.9188e-02, PNorm = 64.5143, GNorm = 0.9079, lr_0 = 3.9132e-04
Loss = 7.0366e-02, PNorm = 64.5230, GNorm = 0.6971, lr_0 = 3.9105e-04
Loss = 7.4704e-02, PNorm = 64.5309, GNorm = 0.6503, lr_0 = 3.9078e-04
Loss = 6.9637e-02, PNorm = 64.5400, GNorm = 0.8347, lr_0 = 3.9051e-04
Loss = 7.3790e-02, PNorm = 64.5506, GNorm = 0.5961, lr_0 = 3.9025e-04
Loss = 7.5233e-02, PNorm = 64.5620, GNorm = 0.9149, lr_0 = 3.8998e-04
Loss = 7.7769e-02, PNorm = 64.5706, GNorm = 0.6070, lr_0 = 3.8971e-04
Loss = 7.5425e-02, PNorm = 64.5813, GNorm = 0.6733, lr_0 = 3.8945e-04
Loss = 7.8613e-02, PNorm = 64.5952, GNorm = 0.5551, lr_0 = 3.8918e-04
Loss = 7.3941e-02, PNorm = 64.6047, GNorm = 0.5973, lr_0 = 3.8891e-04
Loss = 7.2875e-02, PNorm = 64.6117, GNorm = 0.7069, lr_0 = 3.8865e-04
Loss = 7.3286e-02, PNorm = 64.6178, GNorm = 0.6342, lr_0 = 3.8838e-04
Loss = 7.0746e-02, PNorm = 64.6247, GNorm = 0.5326, lr_0 = 3.8811e-04
Loss = 6.6308e-02, PNorm = 64.6328, GNorm = 0.5786, lr_0 = 3.8785e-04
Loss = 5.6102e-02, PNorm = 64.6389, GNorm = 0.9667, lr_0 = 3.8758e-04
Loss = 7.4054e-02, PNorm = 64.6464, GNorm = 0.5006, lr_0 = 3.8732e-04
Loss = 7.3967e-02, PNorm = 64.6530, GNorm = 0.6888, lr_0 = 3.8705e-04
Loss = 8.3551e-02, PNorm = 64.6638, GNorm = 0.6043, lr_0 = 3.8679e-04
Loss = 7.0460e-02, PNorm = 64.6748, GNorm = 0.8513, lr_0 = 3.8652e-04
Loss = 7.3215e-02, PNorm = 64.6819, GNorm = 0.6728, lr_0 = 3.8626e-04
Loss = 8.3943e-02, PNorm = 64.6898, GNorm = 0.8802, lr_0 = 3.8599e-04
Loss = 7.7485e-02, PNorm = 64.6960, GNorm = 0.7130, lr_0 = 3.8573e-04
Loss = 7.0005e-02, PNorm = 64.7035, GNorm = 0.4981, lr_0 = 3.8546e-04
Loss = 8.0852e-02, PNorm = 64.7093, GNorm = 0.8801, lr_0 = 3.8520e-04
Loss = 6.8714e-02, PNorm = 64.7145, GNorm = 0.7407, lr_0 = 3.8493e-04
Loss = 6.3735e-02, PNorm = 64.7203, GNorm = 0.5635, lr_0 = 3.8467e-04
Loss = 8.0672e-02, PNorm = 64.7270, GNorm = 0.8608, lr_0 = 3.8441e-04
Loss = 7.2472e-02, PNorm = 64.7406, GNorm = 0.6286, lr_0 = 3.8414e-04
Loss = 6.8488e-02, PNorm = 64.7515, GNorm = 0.6070, lr_0 = 3.8388e-04
Loss = 7.8861e-02, PNorm = 64.7586, GNorm = 1.0379, lr_0 = 3.8362e-04
Loss = 7.3860e-02, PNorm = 64.7680, GNorm = 0.6903, lr_0 = 3.8336e-04
Loss = 7.2381e-02, PNorm = 64.7751, GNorm = 0.8181, lr_0 = 3.8309e-04
Loss = 7.4242e-02, PNorm = 64.7809, GNorm = 0.5868, lr_0 = 3.8283e-04
Loss = 8.8973e-02, PNorm = 64.7877, GNorm = 0.6523, lr_0 = 3.8257e-04
Loss = 7.2985e-02, PNorm = 64.7962, GNorm = 0.7308, lr_0 = 3.8231e-04
Loss = 7.5333e-02, PNorm = 64.8049, GNorm = 1.1296, lr_0 = 3.8204e-04
Loss = 7.5315e-02, PNorm = 64.8147, GNorm = 0.8549, lr_0 = 3.8178e-04
Loss = 8.0058e-02, PNorm = 64.8237, GNorm = 0.6667, lr_0 = 3.8152e-04
Loss = 7.9517e-02, PNorm = 64.8354, GNorm = 0.7387, lr_0 = 3.8126e-04
Loss = 7.8375e-02, PNorm = 64.8460, GNorm = 1.1896, lr_0 = 3.8100e-04
Loss = 8.6651e-02, PNorm = 64.8600, GNorm = 0.9735, lr_0 = 3.8074e-04
Loss = 8.3302e-02, PNorm = 64.8738, GNorm = 0.5737, lr_0 = 3.8048e-04
Loss = 6.2561e-02, PNorm = 64.8850, GNorm = 0.6602, lr_0 = 3.8022e-04
Loss = 8.2562e-02, PNorm = 64.8915, GNorm = 0.6602, lr_0 = 3.7995e-04
Loss = 7.9906e-02, PNorm = 64.8947, GNorm = 0.4805, lr_0 = 3.7969e-04
Loss = 8.3396e-02, PNorm = 64.8968, GNorm = 0.7416, lr_0 = 3.7943e-04
Loss = 7.6467e-02, PNorm = 64.9038, GNorm = 0.5344, lr_0 = 3.7917e-04
Loss = 7.6937e-02, PNorm = 64.9130, GNorm = 0.6834, lr_0 = 3.7891e-04
Loss = 8.5047e-02, PNorm = 64.9216, GNorm = 0.9439, lr_0 = 3.7866e-04
Loss = 8.7713e-02, PNorm = 64.9295, GNorm = 0.6435, lr_0 = 3.7840e-04
Loss = 7.9744e-02, PNorm = 64.9394, GNorm = 0.6815, lr_0 = 3.7814e-04
Loss = 7.2290e-02, PNorm = 64.9495, GNorm = 0.4660, lr_0 = 3.7788e-04
Loss = 7.5146e-02, PNorm = 64.9597, GNorm = 0.8963, lr_0 = 3.7762e-04
Loss = 7.9468e-02, PNorm = 64.9658, GNorm = 0.6235, lr_0 = 3.7736e-04
Loss = 6.8548e-02, PNorm = 64.9731, GNorm = 1.1660, lr_0 = 3.7710e-04
Loss = 8.6740e-02, PNorm = 64.9778, GNorm = 0.8991, lr_0 = 3.7684e-04
Loss = 8.3118e-02, PNorm = 64.9828, GNorm = 0.5524, lr_0 = 3.7659e-04
Loss = 7.7750e-02, PNorm = 64.9925, GNorm = 0.6036, lr_0 = 3.7633e-04
Loss = 7.7256e-02, PNorm = 65.0015, GNorm = 0.5738, lr_0 = 3.7607e-04
Loss = 8.2387e-02, PNorm = 65.0115, GNorm = 0.5615, lr_0 = 3.7581e-04
Loss = 8.1625e-02, PNorm = 65.0203, GNorm = 0.7098, lr_0 = 3.7555e-04
Loss = 7.6099e-02, PNorm = 65.0300, GNorm = 0.7206, lr_0 = 3.7530e-04
Loss = 6.7928e-02, PNorm = 65.0418, GNorm = 0.5422, lr_0 = 3.7504e-04
Loss = 8.5042e-02, PNorm = 65.0537, GNorm = 0.6651, lr_0 = 3.7478e-04
Loss = 8.0406e-02, PNorm = 65.0589, GNorm = 0.4726, lr_0 = 3.7453e-04
Loss = 9.1185e-02, PNorm = 65.0642, GNorm = 1.4398, lr_0 = 3.7427e-04
Loss = 8.5030e-02, PNorm = 65.0729, GNorm = 0.9686, lr_0 = 3.7401e-04
Loss = 9.2152e-02, PNorm = 65.0834, GNorm = 0.7326, lr_0 = 3.7376e-04
Loss = 7.7790e-02, PNorm = 65.0912, GNorm = 0.4262, lr_0 = 3.7350e-04
Loss = 7.5028e-02, PNorm = 65.1008, GNorm = 0.5040, lr_0 = 3.7325e-04
Loss = 8.4317e-02, PNorm = 65.1088, GNorm = 0.4983, lr_0 = 3.7299e-04
Loss = 6.7868e-02, PNorm = 65.1169, GNorm = 0.4484, lr_0 = 3.7273e-04
Validation mae = 0.394121
Epoch 14
Loss = 8.0593e-02, PNorm = 65.1240, GNorm = 0.8913, lr_0 = 3.7248e-04
Loss = 5.7480e-02, PNorm = 65.1310, GNorm = 0.5777, lr_0 = 3.7222e-04
Loss = 6.3967e-02, PNorm = 65.1399, GNorm = 0.7308, lr_0 = 3.7197e-04
Loss = 6.0528e-02, PNorm = 65.1497, GNorm = 0.8004, lr_0 = 3.7171e-04
Loss = 7.6980e-02, PNorm = 65.1596, GNorm = 0.4809, lr_0 = 3.7146e-04
Loss = 5.4114e-02, PNorm = 65.1684, GNorm = 0.5995, lr_0 = 3.7120e-04
Loss = 6.9243e-02, PNorm = 65.1764, GNorm = 0.9708, lr_0 = 3.7095e-04
Loss = 6.0971e-02, PNorm = 65.1843, GNorm = 0.4805, lr_0 = 3.7070e-04
Loss = 6.3333e-02, PNorm = 65.1886, GNorm = 0.3820, lr_0 = 3.7044e-04
Loss = 6.7388e-02, PNorm = 65.1922, GNorm = 0.5172, lr_0 = 3.7019e-04
Loss = 6.8010e-02, PNorm = 65.1992, GNorm = 0.6565, lr_0 = 3.6993e-04
Loss = 6.6791e-02, PNorm = 65.2074, GNorm = 0.5552, lr_0 = 3.6968e-04
Loss = 6.8036e-02, PNorm = 65.2150, GNorm = 0.7754, lr_0 = 3.6943e-04
Loss = 6.5073e-02, PNorm = 65.2211, GNorm = 0.7015, lr_0 = 3.6917e-04
Loss = 6.9445e-02, PNorm = 65.2319, GNorm = 0.4369, lr_0 = 3.6892e-04
Loss = 6.0077e-02, PNorm = 65.2411, GNorm = 0.7469, lr_0 = 3.6867e-04
Loss = 7.0751e-02, PNorm = 65.2467, GNorm = 0.6710, lr_0 = 3.6842e-04
Loss = 6.8432e-02, PNorm = 65.2578, GNorm = 0.5846, lr_0 = 3.6816e-04
Loss = 6.3397e-02, PNorm = 65.2684, GNorm = 0.4352, lr_0 = 3.6791e-04
Loss = 5.8800e-02, PNorm = 65.2751, GNorm = 0.6573, lr_0 = 3.6766e-04
Loss = 6.8808e-02, PNorm = 65.2844, GNorm = 0.4770, lr_0 = 3.6741e-04
Loss = 7.0045e-02, PNorm = 65.2916, GNorm = 0.6001, lr_0 = 3.6716e-04
Loss = 7.3825e-02, PNorm = 65.3028, GNorm = 0.4700, lr_0 = 3.6690e-04
Loss = 7.4476e-02, PNorm = 65.3113, GNorm = 0.4187, lr_0 = 3.6665e-04
Loss = 6.7665e-02, PNorm = 65.3173, GNorm = 1.2418, lr_0 = 3.6640e-04
Loss = 6.9641e-02, PNorm = 65.3249, GNorm = 0.8201, lr_0 = 3.6615e-04
Loss = 6.5250e-02, PNorm = 65.3363, GNorm = 0.4770, lr_0 = 3.6590e-04
Loss = 7.2952e-02, PNorm = 65.3466, GNorm = 0.7152, lr_0 = 3.6565e-04
Loss = 6.3323e-02, PNorm = 65.3553, GNorm = 0.7078, lr_0 = 3.6540e-04
Loss = 6.7463e-02, PNorm = 65.3643, GNorm = 0.5675, lr_0 = 3.6515e-04
Loss = 6.1922e-02, PNorm = 65.3679, GNorm = 0.5961, lr_0 = 3.6490e-04
Loss = 6.3158e-02, PNorm = 65.3747, GNorm = 0.8908, lr_0 = 3.6465e-04
Loss = 6.4970e-02, PNorm = 65.3839, GNorm = 0.6487, lr_0 = 3.6440e-04
Loss = 6.8499e-02, PNorm = 65.3964, GNorm = 0.5222, lr_0 = 3.6415e-04
Loss = 7.1731e-02, PNorm = 65.4046, GNorm = 0.5872, lr_0 = 3.6390e-04
Loss = 7.7832e-02, PNorm = 65.4145, GNorm = 0.7272, lr_0 = 3.6365e-04
Loss = 6.6588e-02, PNorm = 65.4275, GNorm = 0.4515, lr_0 = 3.6340e-04
Loss = 7.2193e-02, PNorm = 65.4367, GNorm = 0.7102, lr_0 = 3.6315e-04
Loss = 6.5467e-02, PNorm = 65.4434, GNorm = 0.5611, lr_0 = 3.6290e-04
Loss = 6.7354e-02, PNorm = 65.4484, GNorm = 0.6233, lr_0 = 3.6266e-04
Loss = 6.6356e-02, PNorm = 65.4531, GNorm = 1.1561, lr_0 = 3.6241e-04
Loss = 6.7903e-02, PNorm = 65.4581, GNorm = 1.1756, lr_0 = 3.6216e-04
Loss = 7.4420e-02, PNorm = 65.4660, GNorm = 1.0503, lr_0 = 3.6191e-04
Loss = 6.9191e-02, PNorm = 65.4718, GNorm = 0.5932, lr_0 = 3.6166e-04
Loss = 7.3217e-02, PNorm = 65.4775, GNorm = 0.8108, lr_0 = 3.6141e-04
Loss = 7.2642e-02, PNorm = 65.4840, GNorm = 1.1024, lr_0 = 3.6117e-04
Loss = 7.5719e-02, PNorm = 65.4866, GNorm = 0.8347, lr_0 = 3.6092e-04
Loss = 7.5196e-02, PNorm = 65.4919, GNorm = 1.3430, lr_0 = 3.6067e-04
Loss = 7.7072e-02, PNorm = 65.5022, GNorm = 0.3715, lr_0 = 3.6043e-04
Loss = 6.8977e-02, PNorm = 65.5105, GNorm = 0.4440, lr_0 = 3.6018e-04
Loss = 7.1070e-02, PNorm = 65.5176, GNorm = 0.4679, lr_0 = 3.5993e-04
Loss = 6.6654e-02, PNorm = 65.5258, GNorm = 0.5903, lr_0 = 3.5969e-04
Loss = 5.7874e-02, PNorm = 65.5348, GNorm = 0.5226, lr_0 = 3.5944e-04
Loss = 6.7032e-02, PNorm = 65.5389, GNorm = 0.4595, lr_0 = 3.5919e-04
Loss = 7.0085e-02, PNorm = 65.5445, GNorm = 1.0596, lr_0 = 3.5895e-04
Loss = 7.8099e-02, PNorm = 65.5518, GNorm = 1.0180, lr_0 = 3.5870e-04
Loss = 6.5293e-02, PNorm = 65.5631, GNorm = 0.4731, lr_0 = 3.5845e-04
Loss = 7.8337e-02, PNorm = 65.5704, GNorm = 1.3808, lr_0 = 3.5821e-04
Loss = 6.4266e-02, PNorm = 65.5815, GNorm = 0.7590, lr_0 = 3.5796e-04
Loss = 7.4239e-02, PNorm = 65.5924, GNorm = 1.0853, lr_0 = 3.5772e-04
Loss = 6.8515e-02, PNorm = 65.5997, GNorm = 0.8863, lr_0 = 3.5747e-04
Loss = 7.9184e-02, PNorm = 65.6061, GNorm = 0.9516, lr_0 = 3.5723e-04
Loss = 7.5233e-02, PNorm = 65.6150, GNorm = 0.9406, lr_0 = 3.5698e-04
Loss = 9.3973e-02, PNorm = 65.6253, GNorm = 0.5614, lr_0 = 3.5674e-04
Loss = 8.5910e-02, PNorm = 65.6373, GNorm = 0.4424, lr_0 = 3.5650e-04
Loss = 8.2157e-02, PNorm = 65.6466, GNorm = 0.5840, lr_0 = 3.5625e-04
Loss = 6.8209e-02, PNorm = 65.6531, GNorm = 0.4587, lr_0 = 3.5601e-04
Loss = 7.5160e-02, PNorm = 65.6588, GNorm = 0.7125, lr_0 = 3.5576e-04
Loss = 7.2038e-02, PNorm = 65.6656, GNorm = 0.5539, lr_0 = 3.5552e-04
Loss = 6.3659e-02, PNorm = 65.6706, GNorm = 0.5783, lr_0 = 3.5528e-04
Loss = 7.6416e-02, PNorm = 65.6746, GNorm = 0.6314, lr_0 = 3.5503e-04
Loss = 7.6924e-02, PNorm = 65.6825, GNorm = 0.6488, lr_0 = 3.5479e-04
Loss = 7.1368e-02, PNorm = 65.6903, GNorm = 0.4472, lr_0 = 3.5455e-04
Loss = 6.9613e-02, PNorm = 65.6995, GNorm = 0.7046, lr_0 = 3.5430e-04
Loss = 7.2064e-02, PNorm = 65.7068, GNorm = 1.3887, lr_0 = 3.5406e-04
Loss = 8.1011e-02, PNorm = 65.7136, GNorm = 1.0636, lr_0 = 3.5382e-04
Loss = 7.0645e-02, PNorm = 65.7198, GNorm = 0.8392, lr_0 = 3.5358e-04
Loss = 6.8297e-02, PNorm = 65.7291, GNorm = 0.6455, lr_0 = 3.5333e-04
Loss = 6.9999e-02, PNorm = 65.7370, GNorm = 0.6214, lr_0 = 3.5309e-04
Loss = 5.8315e-02, PNorm = 65.7449, GNorm = 0.4893, lr_0 = 3.5285e-04
Loss = 6.8516e-02, PNorm = 65.7537, GNorm = 0.3924, lr_0 = 3.5261e-04
Loss = 6.3599e-02, PNorm = 65.7625, GNorm = 0.7005, lr_0 = 3.5237e-04
Loss = 6.0020e-02, PNorm = 65.7703, GNorm = 0.7319, lr_0 = 3.5212e-04
Loss = 7.0016e-02, PNorm = 65.7784, GNorm = 0.4521, lr_0 = 3.5188e-04
Loss = 6.7712e-02, PNorm = 65.7832, GNorm = 0.7451, lr_0 = 3.5164e-04
Loss = 7.7847e-02, PNorm = 65.7855, GNorm = 0.6463, lr_0 = 3.5140e-04
Loss = 6.6782e-02, PNorm = 65.7957, GNorm = 0.5232, lr_0 = 3.5116e-04
Loss = 5.8224e-02, PNorm = 65.8029, GNorm = 0.5954, lr_0 = 3.5092e-04
Loss = 7.4321e-02, PNorm = 65.8095, GNorm = 0.6466, lr_0 = 3.5068e-04
Loss = 6.9326e-02, PNorm = 65.8155, GNorm = 0.7978, lr_0 = 3.5044e-04
Loss = 7.2771e-02, PNorm = 65.8201, GNorm = 0.4904, lr_0 = 3.5020e-04
Loss = 7.3920e-02, PNorm = 65.8255, GNorm = 0.5100, lr_0 = 3.4996e-04
Loss = 7.6559e-02, PNorm = 65.8315, GNorm = 0.8862, lr_0 = 3.4972e-04
Loss = 7.1953e-02, PNorm = 65.8474, GNorm = 0.5490, lr_0 = 3.4948e-04
Loss = 7.2269e-02, PNorm = 65.8570, GNorm = 1.9329, lr_0 = 3.4924e-04
Loss = 7.4474e-02, PNorm = 65.8647, GNorm = 0.8986, lr_0 = 3.4900e-04
Loss = 7.2612e-02, PNorm = 65.8749, GNorm = 0.5563, lr_0 = 3.4876e-04
Loss = 8.1987e-02, PNorm = 65.8851, GNorm = 0.6214, lr_0 = 3.4852e-04
Loss = 6.8927e-02, PNorm = 65.8924, GNorm = 0.8505, lr_0 = 3.4828e-04
Loss = 7.5977e-02, PNorm = 65.8973, GNorm = 0.9108, lr_0 = 3.4805e-04
Loss = 7.3223e-02, PNorm = 65.9060, GNorm = 0.6347, lr_0 = 3.4781e-04
Loss = 6.3252e-02, PNorm = 65.9141, GNorm = 0.7519, lr_0 = 3.4757e-04
Loss = 7.6304e-02, PNorm = 65.9198, GNorm = 0.7311, lr_0 = 3.4733e-04
Loss = 8.4072e-02, PNorm = 65.9279, GNorm = 0.4890, lr_0 = 3.4709e-04
Loss = 7.0668e-02, PNorm = 65.9341, GNorm = 0.6250, lr_0 = 3.4686e-04
Loss = 7.2671e-02, PNorm = 65.9416, GNorm = 1.0442, lr_0 = 3.4662e-04
Loss = 6.8532e-02, PNorm = 65.9438, GNorm = 0.7232, lr_0 = 3.4638e-04
Loss = 8.5489e-02, PNorm = 65.9472, GNorm = 0.6417, lr_0 = 3.4614e-04
Loss = 7.6450e-02, PNorm = 65.9554, GNorm = 0.7183, lr_0 = 3.4591e-04
Loss = 8.3604e-02, PNorm = 65.9608, GNorm = 0.8845, lr_0 = 3.4567e-04
Loss = 7.2532e-02, PNorm = 65.9721, GNorm = 0.8352, lr_0 = 3.4543e-04
Loss = 6.8951e-02, PNorm = 65.9779, GNorm = 0.7241, lr_0 = 3.4520e-04
Loss = 7.8152e-02, PNorm = 65.9836, GNorm = 1.4621, lr_0 = 3.4496e-04
Loss = 8.0005e-02, PNorm = 65.9919, GNorm = 0.6389, lr_0 = 3.4472e-04
Loss = 7.7836e-02, PNorm = 66.0004, GNorm = 0.6237, lr_0 = 3.4449e-04
Loss = 5.8730e-02, PNorm = 66.0083, GNorm = 0.6507, lr_0 = 3.4425e-04
Loss = 6.9273e-02, PNorm = 66.0125, GNorm = 0.5864, lr_0 = 3.4402e-04
Loss = 7.3133e-02, PNorm = 66.0172, GNorm = 0.4883, lr_0 = 3.4378e-04
Loss = 6.3770e-02, PNorm = 66.0217, GNorm = 0.4006, lr_0 = 3.4354e-04
Loss = 7.0243e-02, PNorm = 66.0275, GNorm = 0.6027, lr_0 = 3.4331e-04
Validation mae = 0.392390
Epoch 15
Loss = 5.4899e-02, PNorm = 66.0333, GNorm = 0.5712, lr_0 = 3.4307e-04
Loss = 6.2557e-02, PNorm = 66.0387, GNorm = 0.5079, lr_0 = 3.4284e-04
Loss = 5.1721e-02, PNorm = 66.0471, GNorm = 0.5582, lr_0 = 3.4260e-04
Loss = 5.8717e-02, PNorm = 66.0562, GNorm = 0.5831, lr_0 = 3.4237e-04
Loss = 6.4471e-02, PNorm = 66.0619, GNorm = 0.9704, lr_0 = 3.4213e-04
Loss = 6.7728e-02, PNorm = 66.0708, GNorm = 0.4153, lr_0 = 3.4190e-04
Loss = 6.9405e-02, PNorm = 66.0809, GNorm = 0.7365, lr_0 = 3.4167e-04
Loss = 4.8776e-02, PNorm = 66.0882, GNorm = 0.4327, lr_0 = 3.4143e-04
Loss = 6.2049e-02, PNorm = 66.0939, GNorm = 0.8596, lr_0 = 3.4120e-04
Loss = 7.2062e-02, PNorm = 66.1014, GNorm = 0.7425, lr_0 = 3.4096e-04
Loss = 6.4623e-02, PNorm = 66.1068, GNorm = 0.5271, lr_0 = 3.4073e-04
Loss = 5.3939e-02, PNorm = 66.1114, GNorm = 0.6066, lr_0 = 3.4050e-04
Loss = 5.8051e-02, PNorm = 66.1178, GNorm = 0.4821, lr_0 = 3.4026e-04
Loss = 5.8216e-02, PNorm = 66.1216, GNorm = 0.6530, lr_0 = 3.4003e-04
Loss = 6.6755e-02, PNorm = 66.1282, GNorm = 0.9175, lr_0 = 3.3980e-04
Loss = 7.1340e-02, PNorm = 66.1377, GNorm = 0.5384, lr_0 = 3.3956e-04
Loss = 5.6108e-02, PNorm = 66.1469, GNorm = 0.5864, lr_0 = 3.3933e-04
Loss = 6.8647e-02, PNorm = 66.1527, GNorm = 0.6675, lr_0 = 3.3910e-04
Loss = 7.4017e-02, PNorm = 66.1609, GNorm = 0.5686, lr_0 = 3.3887e-04
Loss = 7.2963e-02, PNorm = 66.1709, GNorm = 0.4122, lr_0 = 3.3864e-04
Loss = 7.3105e-02, PNorm = 66.1799, GNorm = 0.6278, lr_0 = 3.3840e-04
Loss = 5.9342e-02, PNorm = 66.1863, GNorm = 0.4607, lr_0 = 3.3817e-04
Loss = 7.2607e-02, PNorm = 66.1972, GNorm = 0.6660, lr_0 = 3.3794e-04
Loss = 6.0882e-02, PNorm = 66.2074, GNorm = 0.8371, lr_0 = 3.3771e-04
Loss = 7.1378e-02, PNorm = 66.2160, GNorm = 0.5019, lr_0 = 3.3748e-04
Loss = 5.5698e-02, PNorm = 66.2226, GNorm = 0.9821, lr_0 = 3.3725e-04
Loss = 6.3443e-02, PNorm = 66.2295, GNorm = 0.5260, lr_0 = 3.3701e-04
Loss = 6.0794e-02, PNorm = 66.2352, GNorm = 1.0087, lr_0 = 3.3678e-04
Loss = 6.4411e-02, PNorm = 66.2426, GNorm = 0.5506, lr_0 = 3.3655e-04
Loss = 7.5465e-02, PNorm = 66.2509, GNorm = 0.4490, lr_0 = 3.3632e-04
Loss = 5.9559e-02, PNorm = 66.2571, GNorm = 0.4333, lr_0 = 3.3609e-04
Loss = 6.1134e-02, PNorm = 66.2629, GNorm = 0.5152, lr_0 = 3.3586e-04
Loss = 6.4285e-02, PNorm = 66.2692, GNorm = 0.5657, lr_0 = 3.3563e-04
Loss = 5.5056e-02, PNorm = 66.2804, GNorm = 0.5389, lr_0 = 3.3540e-04
Loss = 6.3244e-02, PNorm = 66.2890, GNorm = 0.6382, lr_0 = 3.3517e-04
Loss = 5.5296e-02, PNorm = 66.2949, GNorm = 0.4224, lr_0 = 3.3494e-04
Loss = 7.3689e-02, PNorm = 66.2992, GNorm = 0.5024, lr_0 = 3.3471e-04
Loss = 6.4164e-02, PNorm = 66.3049, GNorm = 0.5402, lr_0 = 3.3448e-04
Loss = 5.7888e-02, PNorm = 66.3109, GNorm = 0.5561, lr_0 = 3.3425e-04
Loss = 5.7585e-02, PNorm = 66.3150, GNorm = 0.6662, lr_0 = 3.3403e-04
Loss = 6.3359e-02, PNorm = 66.3204, GNorm = 0.5972, lr_0 = 3.3380e-04
Loss = 6.2901e-02, PNorm = 66.3261, GNorm = 0.7270, lr_0 = 3.3357e-04
Loss = 6.4692e-02, PNorm = 66.3345, GNorm = 0.5397, lr_0 = 3.3334e-04
Loss = 5.9180e-02, PNorm = 66.3417, GNorm = 0.7496, lr_0 = 3.3311e-04
Loss = 6.2434e-02, PNorm = 66.3480, GNorm = 0.5949, lr_0 = 3.3288e-04
Loss = 7.1511e-02, PNorm = 66.3529, GNorm = 0.6297, lr_0 = 3.3265e-04
Loss = 6.9400e-02, PNorm = 66.3570, GNorm = 0.7583, lr_0 = 3.3243e-04
Loss = 6.3914e-02, PNorm = 66.3642, GNorm = 0.4956, lr_0 = 3.3220e-04
Loss = 6.6337e-02, PNorm = 66.3725, GNorm = 0.5035, lr_0 = 3.3197e-04
Loss = 7.1606e-02, PNorm = 66.3794, GNorm = 0.9042, lr_0 = 3.3174e-04
Loss = 6.4943e-02, PNorm = 66.3845, GNorm = 0.7146, lr_0 = 3.3152e-04
Loss = 7.3185e-02, PNorm = 66.3900, GNorm = 0.6197, lr_0 = 3.3129e-04
Loss = 8.4591e-02, PNorm = 66.3970, GNorm = 1.0479, lr_0 = 3.3106e-04
Loss = 6.6592e-02, PNorm = 66.4073, GNorm = 0.7726, lr_0 = 3.3084e-04
Loss = 6.4349e-02, PNorm = 66.4165, GNorm = 0.7190, lr_0 = 3.3061e-04
Loss = 6.2859e-02, PNorm = 66.4251, GNorm = 0.4341, lr_0 = 3.3038e-04
Loss = 6.3588e-02, PNorm = 66.4315, GNorm = 0.5774, lr_0 = 3.3016e-04
Loss = 6.6336e-02, PNorm = 66.4351, GNorm = 0.8551, lr_0 = 3.2993e-04
Loss = 5.6993e-02, PNorm = 66.4401, GNorm = 0.7694, lr_0 = 3.2970e-04
Loss = 6.3560e-02, PNorm = 66.4446, GNorm = 0.5698, lr_0 = 3.2948e-04
Loss = 7.7093e-02, PNorm = 66.4509, GNorm = 0.9554, lr_0 = 3.2925e-04
Loss = 5.9059e-02, PNorm = 66.4542, GNorm = 0.5183, lr_0 = 3.2903e-04
Loss = 6.4814e-02, PNorm = 66.4591, GNorm = 0.6851, lr_0 = 3.2880e-04
Loss = 6.1502e-02, PNorm = 66.4656, GNorm = 0.6488, lr_0 = 3.2858e-04
Loss = 6.1082e-02, PNorm = 66.4741, GNorm = 0.9667, lr_0 = 3.2835e-04
Loss = 6.6312e-02, PNorm = 66.4821, GNorm = 0.6143, lr_0 = 3.2813e-04
Loss = 6.4389e-02, PNorm = 66.4901, GNorm = 0.7489, lr_0 = 3.2790e-04
Loss = 7.5365e-02, PNorm = 66.4979, GNorm = 0.7798, lr_0 = 3.2768e-04
Loss = 6.3757e-02, PNorm = 66.5018, GNorm = 0.6554, lr_0 = 3.2745e-04
Loss = 6.9758e-02, PNorm = 66.5074, GNorm = 0.6088, lr_0 = 3.2723e-04
Loss = 6.3384e-02, PNorm = 66.5121, GNorm = 0.5354, lr_0 = 3.2700e-04
Loss = 6.9475e-02, PNorm = 66.5173, GNorm = 0.6979, lr_0 = 3.2678e-04
Loss = 6.1542e-02, PNorm = 66.5238, GNorm = 0.5303, lr_0 = 3.2656e-04
Loss = 5.7998e-02, PNorm = 66.5298, GNorm = 0.5284, lr_0 = 3.2633e-04
Loss = 6.8568e-02, PNorm = 66.5363, GNorm = 0.5013, lr_0 = 3.2611e-04
Loss = 6.5060e-02, PNorm = 66.5433, GNorm = 0.5127, lr_0 = 3.2589e-04
Loss = 6.1096e-02, PNorm = 66.5497, GNorm = 0.5392, lr_0 = 3.2566e-04
Loss = 6.5367e-02, PNorm = 66.5569, GNorm = 0.4501, lr_0 = 3.2544e-04
Loss = 5.7123e-02, PNorm = 66.5664, GNorm = 0.6700, lr_0 = 3.2522e-04
Loss = 5.7175e-02, PNorm = 66.5730, GNorm = 0.6700, lr_0 = 3.2499e-04
Loss = 6.5851e-02, PNorm = 66.5801, GNorm = 0.4488, lr_0 = 3.2477e-04
Loss = 6.9513e-02, PNorm = 66.5833, GNorm = 0.7313, lr_0 = 3.2455e-04
Loss = 6.1684e-02, PNorm = 66.5871, GNorm = 0.4567, lr_0 = 3.2433e-04
Loss = 7.0075e-02, PNorm = 66.5916, GNorm = 0.6262, lr_0 = 3.2410e-04
Loss = 6.3836e-02, PNorm = 66.6007, GNorm = 0.6224, lr_0 = 3.2388e-04
Loss = 6.1248e-02, PNorm = 66.6056, GNorm = 0.6695, lr_0 = 3.2366e-04
Loss = 7.1008e-02, PNorm = 66.6107, GNorm = 0.8630, lr_0 = 3.2344e-04
Loss = 6.0181e-02, PNorm = 66.6159, GNorm = 0.5486, lr_0 = 3.2322e-04
Loss = 6.7039e-02, PNorm = 66.6194, GNorm = 0.5114, lr_0 = 3.2300e-04
Loss = 6.4177e-02, PNorm = 66.6244, GNorm = 0.6184, lr_0 = 3.2277e-04
Loss = 7.4382e-02, PNorm = 66.6293, GNorm = 0.6411, lr_0 = 3.2255e-04
Loss = 6.6036e-02, PNorm = 66.6343, GNorm = 0.6685, lr_0 = 3.2233e-04
Loss = 6.5503e-02, PNorm = 66.6390, GNorm = 0.4634, lr_0 = 3.2211e-04
Loss = 6.6801e-02, PNorm = 66.6456, GNorm = 0.5334, lr_0 = 3.2189e-04
Loss = 6.4203e-02, PNorm = 66.6535, GNorm = 0.7334, lr_0 = 3.2167e-04
Loss = 7.7395e-02, PNorm = 66.6597, GNorm = 1.0066, lr_0 = 3.2145e-04
Loss = 5.9881e-02, PNorm = 66.6649, GNorm = 0.6791, lr_0 = 3.2123e-04
Loss = 6.5558e-02, PNorm = 66.6704, GNorm = 0.5216, lr_0 = 3.2101e-04
Loss = 6.0396e-02, PNorm = 66.6778, GNorm = 0.5609, lr_0 = 3.2079e-04
Loss = 6.7557e-02, PNorm = 66.6852, GNorm = 0.4696, lr_0 = 3.2057e-04
Loss = 7.1502e-02, PNorm = 66.6942, GNorm = 0.5952, lr_0 = 3.2035e-04
Loss = 6.6150e-02, PNorm = 66.7007, GNorm = 0.3416, lr_0 = 3.2013e-04
Loss = 7.3240e-02, PNorm = 66.7062, GNorm = 0.8633, lr_0 = 3.1991e-04
Loss = 6.9176e-02, PNorm = 66.7111, GNorm = 0.5966, lr_0 = 3.1969e-04
Loss = 7.3887e-02, PNorm = 66.7180, GNorm = 0.5736, lr_0 = 3.1947e-04
Loss = 7.0848e-02, PNorm = 66.7244, GNorm = 0.6935, lr_0 = 3.1925e-04
Loss = 6.0917e-02, PNorm = 66.7296, GNorm = 0.6981, lr_0 = 3.1904e-04
Loss = 7.7342e-02, PNorm = 66.7369, GNorm = 0.9158, lr_0 = 3.1882e-04
Loss = 6.3982e-02, PNorm = 66.7445, GNorm = 0.7215, lr_0 = 3.1860e-04
Loss = 7.0620e-02, PNorm = 66.7499, GNorm = 0.5998, lr_0 = 3.1838e-04
Loss = 6.8483e-02, PNorm = 66.7549, GNorm = 0.5629, lr_0 = 3.1816e-04
Loss = 5.6240e-02, PNorm = 66.7596, GNorm = 0.4927, lr_0 = 3.1794e-04
Loss = 6.4128e-02, PNorm = 66.7642, GNorm = 0.4862, lr_0 = 3.1773e-04
Loss = 6.8957e-02, PNorm = 66.7710, GNorm = 0.5745, lr_0 = 3.1751e-04
Loss = 6.2164e-02, PNorm = 66.7776, GNorm = 0.8516, lr_0 = 3.1729e-04
Loss = 6.4593e-02, PNorm = 66.7834, GNorm = 0.6445, lr_0 = 3.1707e-04
Loss = 6.1303e-02, PNorm = 66.7875, GNorm = 0.5205, lr_0 = 3.1686e-04
Loss = 6.3924e-02, PNorm = 66.7940, GNorm = 0.5311, lr_0 = 3.1664e-04
Loss = 6.8062e-02, PNorm = 66.8020, GNorm = 0.5441, lr_0 = 3.1642e-04
Loss = 5.9159e-02, PNorm = 66.8074, GNorm = 0.5124, lr_0 = 3.1621e-04
Validation mae = 0.388994
Epoch 16
Loss = 5.5055e-02, PNorm = 66.8120, GNorm = 0.5511, lr_0 = 3.1599e-04
Loss = 6.2948e-02, PNorm = 66.8181, GNorm = 0.5032, lr_0 = 3.1577e-04
Loss = 5.2729e-02, PNorm = 66.8238, GNorm = 0.8190, lr_0 = 3.1556e-04
Loss = 6.6751e-02, PNorm = 66.8325, GNorm = 0.4766, lr_0 = 3.1534e-04
Loss = 6.2207e-02, PNorm = 66.8405, GNorm = 0.6373, lr_0 = 3.1512e-04
Loss = 7.3946e-02, PNorm = 66.8478, GNorm = 0.5122, lr_0 = 3.1491e-04
Loss = 5.2605e-02, PNorm = 66.8543, GNorm = 0.5038, lr_0 = 3.1469e-04
Loss = 5.8006e-02, PNorm = 66.8609, GNorm = 0.6062, lr_0 = 3.1448e-04
Loss = 5.4925e-02, PNorm = 66.8682, GNorm = 0.8723, lr_0 = 3.1426e-04
Loss = 5.2599e-02, PNorm = 66.8729, GNorm = 0.6971, lr_0 = 3.1405e-04
Loss = 5.7252e-02, PNorm = 66.8779, GNorm = 0.3693, lr_0 = 3.1383e-04
Loss = 6.0263e-02, PNorm = 66.8842, GNorm = 0.8111, lr_0 = 3.1362e-04
Loss = 5.9849e-02, PNorm = 66.8899, GNorm = 0.6323, lr_0 = 3.1340e-04
Loss = 5.2098e-02, PNorm = 66.8975, GNorm = 0.5186, lr_0 = 3.1319e-04
Loss = 5.7674e-02, PNorm = 66.9038, GNorm = 0.7974, lr_0 = 3.1297e-04
Loss = 5.7124e-02, PNorm = 66.9094, GNorm = 0.6605, lr_0 = 3.1276e-04
Loss = 6.2979e-02, PNorm = 66.9154, GNorm = 0.5749, lr_0 = 3.1254e-04
Loss = 6.0598e-02, PNorm = 66.9214, GNorm = 0.4876, lr_0 = 3.1233e-04
Loss = 6.2032e-02, PNorm = 66.9267, GNorm = 0.6727, lr_0 = 3.1212e-04
Loss = 5.4033e-02, PNorm = 66.9335, GNorm = 0.5852, lr_0 = 3.1190e-04
Loss = 5.7193e-02, PNorm = 66.9392, GNorm = 0.5309, lr_0 = 3.1169e-04
Loss = 7.2849e-02, PNorm = 66.9451, GNorm = 0.5957, lr_0 = 3.1147e-04
Loss = 5.7220e-02, PNorm = 66.9504, GNorm = 0.4229, lr_0 = 3.1126e-04
Loss = 6.1080e-02, PNorm = 66.9573, GNorm = 0.7138, lr_0 = 3.1105e-04
Loss = 6.5667e-02, PNorm = 66.9635, GNorm = 0.6425, lr_0 = 3.1083e-04
Loss = 6.1760e-02, PNorm = 66.9684, GNorm = 0.6864, lr_0 = 3.1062e-04
Loss = 6.8632e-02, PNorm = 66.9744, GNorm = 0.6029, lr_0 = 3.1041e-04
Loss = 6.0424e-02, PNorm = 66.9803, GNorm = 0.7811, lr_0 = 3.1020e-04
Loss = 5.8525e-02, PNorm = 66.9881, GNorm = 0.4911, lr_0 = 3.0998e-04
Loss = 5.5429e-02, PNorm = 66.9947, GNorm = 0.4586, lr_0 = 3.0977e-04
Loss = 7.0800e-02, PNorm = 67.0031, GNorm = 0.7189, lr_0 = 3.0956e-04
Loss = 5.3809e-02, PNorm = 67.0117, GNorm = 0.7597, lr_0 = 3.0935e-04
Loss = 6.5392e-02, PNorm = 67.0188, GNorm = 0.6772, lr_0 = 3.0914e-04
Loss = 6.1747e-02, PNorm = 67.0273, GNorm = 0.4508, lr_0 = 3.0892e-04
Loss = 5.1022e-02, PNorm = 67.0350, GNorm = 0.6484, lr_0 = 3.0871e-04
Loss = 5.8915e-02, PNorm = 67.0414, GNorm = 0.4935, lr_0 = 3.0850e-04
Loss = 6.9895e-02, PNorm = 67.0484, GNorm = 0.4636, lr_0 = 3.0829e-04
Loss = 6.6604e-02, PNorm = 67.0549, GNorm = 1.1454, lr_0 = 3.0808e-04
Loss = 5.4693e-02, PNorm = 67.0618, GNorm = 0.5534, lr_0 = 3.0787e-04
Loss = 6.5847e-02, PNorm = 67.0695, GNorm = 0.6677, lr_0 = 3.0766e-04
Loss = 5.5196e-02, PNorm = 67.0745, GNorm = 0.6406, lr_0 = 3.0745e-04
Loss = 5.6792e-02, PNorm = 67.0790, GNorm = 0.4871, lr_0 = 3.0723e-04
Loss = 6.3597e-02, PNorm = 67.0853, GNorm = 0.5688, lr_0 = 3.0702e-04
Loss = 6.1940e-02, PNorm = 67.0914, GNorm = 0.5188, lr_0 = 3.0681e-04
Loss = 5.5945e-02, PNorm = 67.0965, GNorm = 0.5677, lr_0 = 3.0660e-04
Loss = 5.7174e-02, PNorm = 67.1019, GNorm = 0.5881, lr_0 = 3.0639e-04
Loss = 6.1912e-02, PNorm = 67.1083, GNorm = 0.8060, lr_0 = 3.0618e-04
Loss = 5.7481e-02, PNorm = 67.1140, GNorm = 0.4479, lr_0 = 3.0597e-04
Loss = 6.3805e-02, PNorm = 67.1193, GNorm = 0.4481, lr_0 = 3.0576e-04
Loss = 6.0570e-02, PNorm = 67.1246, GNorm = 0.5163, lr_0 = 3.0555e-04
Loss = 5.6729e-02, PNorm = 67.1316, GNorm = 0.6462, lr_0 = 3.0535e-04
Loss = 5.7736e-02, PNorm = 67.1391, GNorm = 0.4761, lr_0 = 3.0514e-04
Loss = 5.9648e-02, PNorm = 67.1452, GNorm = 0.6055, lr_0 = 3.0493e-04
Loss = 6.0858e-02, PNorm = 67.1498, GNorm = 0.7747, lr_0 = 3.0472e-04
Loss = 6.4761e-02, PNorm = 67.1563, GNorm = 0.4931, lr_0 = 3.0451e-04
Loss = 7.1612e-02, PNorm = 67.1649, GNorm = 0.9240, lr_0 = 3.0430e-04
Loss = 5.2412e-02, PNorm = 67.1725, GNorm = 0.4757, lr_0 = 3.0409e-04
Loss = 5.5558e-02, PNorm = 67.1773, GNorm = 0.5012, lr_0 = 3.0388e-04
Loss = 6.4585e-02, PNorm = 67.1819, GNorm = 0.5938, lr_0 = 3.0368e-04
Loss = 5.3931e-02, PNorm = 67.1869, GNorm = 0.8096, lr_0 = 3.0347e-04
Loss = 6.0798e-02, PNorm = 67.1931, GNorm = 0.4887, lr_0 = 3.0326e-04
Loss = 6.5753e-02, PNorm = 67.1996, GNorm = 0.4236, lr_0 = 3.0305e-04
Loss = 6.5787e-02, PNorm = 67.2058, GNorm = 0.4444, lr_0 = 3.0284e-04
Loss = 6.7731e-02, PNorm = 67.2090, GNorm = 0.4656, lr_0 = 3.0264e-04
Loss = 6.4127e-02, PNorm = 67.2150, GNorm = 0.9520, lr_0 = 3.0243e-04
Loss = 5.4430e-02, PNorm = 67.2196, GNorm = 0.4054, lr_0 = 3.0222e-04
Loss = 5.4871e-02, PNorm = 67.2233, GNorm = 0.6335, lr_0 = 3.0202e-04
Loss = 5.7943e-02, PNorm = 67.2274, GNorm = 0.7166, lr_0 = 3.0181e-04
Loss = 6.9980e-02, PNorm = 67.2288, GNorm = 0.9910, lr_0 = 3.0160e-04
Loss = 6.2801e-02, PNorm = 67.2345, GNorm = 1.0349, lr_0 = 3.0140e-04
Loss = 6.0068e-02, PNorm = 67.2434, GNorm = 0.6269, lr_0 = 3.0119e-04
Loss = 6.0161e-02, PNorm = 67.2510, GNorm = 0.5606, lr_0 = 3.0098e-04
Loss = 6.1647e-02, PNorm = 67.2551, GNorm = 0.7083, lr_0 = 3.0078e-04
Loss = 6.7290e-02, PNorm = 67.2617, GNorm = 0.7139, lr_0 = 3.0057e-04
Loss = 6.3273e-02, PNorm = 67.2677, GNorm = 0.4707, lr_0 = 3.0036e-04
Loss = 6.3857e-02, PNorm = 67.2720, GNorm = 0.6371, lr_0 = 3.0016e-04
Loss = 6.2095e-02, PNorm = 67.2743, GNorm = 0.7801, lr_0 = 2.9995e-04
Loss = 6.5852e-02, PNorm = 67.2802, GNorm = 0.9132, lr_0 = 2.9975e-04
Loss = 5.5782e-02, PNorm = 67.2898, GNorm = 0.5282, lr_0 = 2.9954e-04
Loss = 5.8973e-02, PNorm = 67.2976, GNorm = 0.4855, lr_0 = 2.9934e-04
Loss = 5.6595e-02, PNorm = 67.3025, GNorm = 1.0758, lr_0 = 2.9913e-04
Loss = 5.7217e-02, PNorm = 67.3084, GNorm = 0.4774, lr_0 = 2.9893e-04
Loss = 5.7860e-02, PNorm = 67.3158, GNorm = 0.7079, lr_0 = 2.9872e-04
Loss = 5.4525e-02, PNorm = 67.3212, GNorm = 0.4942, lr_0 = 2.9852e-04
Loss = 5.8775e-02, PNorm = 67.3279, GNorm = 0.8373, lr_0 = 2.9831e-04
Loss = 6.1423e-02, PNorm = 67.3348, GNorm = 0.4692, lr_0 = 2.9811e-04
Loss = 5.0664e-02, PNorm = 67.3400, GNorm = 0.4392, lr_0 = 2.9790e-04
Loss = 5.6535e-02, PNorm = 67.3460, GNorm = 0.5070, lr_0 = 2.9770e-04
Loss = 6.1094e-02, PNorm = 67.3521, GNorm = 0.4735, lr_0 = 2.9750e-04
Loss = 6.2588e-02, PNorm = 67.3573, GNorm = 0.6646, lr_0 = 2.9729e-04
Loss = 6.0298e-02, PNorm = 67.3641, GNorm = 0.7342, lr_0 = 2.9709e-04
Loss = 5.9546e-02, PNorm = 67.3696, GNorm = 0.5980, lr_0 = 2.9689e-04
Loss = 6.6129e-02, PNorm = 67.3745, GNorm = 0.6258, lr_0 = 2.9668e-04
Loss = 5.3555e-02, PNorm = 67.3786, GNorm = 0.6613, lr_0 = 2.9648e-04
Loss = 5.9801e-02, PNorm = 67.3850, GNorm = 0.7102, lr_0 = 2.9628e-04
Loss = 6.3377e-02, PNorm = 67.3929, GNorm = 0.6807, lr_0 = 2.9607e-04
Loss = 6.8155e-02, PNorm = 67.3991, GNorm = 0.8931, lr_0 = 2.9587e-04
Loss = 6.5656e-02, PNorm = 67.4032, GNorm = 0.6005, lr_0 = 2.9567e-04
Loss = 6.4158e-02, PNorm = 67.4087, GNorm = 0.4183, lr_0 = 2.9546e-04
Loss = 6.7062e-02, PNorm = 67.4159, GNorm = 0.4724, lr_0 = 2.9526e-04
Loss = 5.6788e-02, PNorm = 67.4201, GNorm = 0.5556, lr_0 = 2.9506e-04
Loss = 6.2929e-02, PNorm = 67.4251, GNorm = 0.6880, lr_0 = 2.9486e-04
Loss = 6.5043e-02, PNorm = 67.4290, GNorm = 0.5540, lr_0 = 2.9466e-04
Loss = 5.6843e-02, PNorm = 67.4340, GNorm = 0.4510, lr_0 = 2.9445e-04
Loss = 6.6335e-02, PNorm = 67.4380, GNorm = 0.7645, lr_0 = 2.9425e-04
Loss = 7.2451e-02, PNorm = 67.4440, GNorm = 0.8301, lr_0 = 2.9405e-04
Loss = 7.2619e-02, PNorm = 67.4507, GNorm = 0.4953, lr_0 = 2.9385e-04
Loss = 5.9645e-02, PNorm = 67.4584, GNorm = 0.4199, lr_0 = 2.9365e-04
Loss = 6.4197e-02, PNorm = 67.4644, GNorm = 0.6877, lr_0 = 2.9345e-04
Loss = 7.1207e-02, PNorm = 67.4719, GNorm = 0.7854, lr_0 = 2.9325e-04
Loss = 6.4546e-02, PNorm = 67.4778, GNorm = 0.5972, lr_0 = 2.9305e-04
Loss = 6.2258e-02, PNorm = 67.4838, GNorm = 0.4639, lr_0 = 2.9284e-04
Loss = 6.2858e-02, PNorm = 67.4887, GNorm = 0.5489, lr_0 = 2.9264e-04
Loss = 6.0241e-02, PNorm = 67.4970, GNorm = 0.7462, lr_0 = 2.9244e-04
Loss = 5.5382e-02, PNorm = 67.5042, GNorm = 0.5660, lr_0 = 2.9224e-04
Loss = 6.3440e-02, PNorm = 67.5103, GNorm = 0.6722, lr_0 = 2.9204e-04
Loss = 5.7202e-02, PNorm = 67.5147, GNorm = 0.6994, lr_0 = 2.9184e-04
Loss = 6.0718e-02, PNorm = 67.5196, GNorm = 0.5422, lr_0 = 2.9164e-04
Loss = 5.7994e-02, PNorm = 67.5243, GNorm = 0.7283, lr_0 = 2.9144e-04
Loss = 6.8113e-02, PNorm = 67.5276, GNorm = 0.7680, lr_0 = 2.9124e-04
Validation mae = 0.391312
Epoch 17
Loss = 5.8029e-02, PNorm = 67.5322, GNorm = 0.5981, lr_0 = 2.9104e-04
Loss = 5.6285e-02, PNorm = 67.5398, GNorm = 0.8974, lr_0 = 2.9084e-04
Loss = 5.4286e-02, PNorm = 67.5465, GNorm = 0.7021, lr_0 = 2.9065e-04
Loss = 5.9868e-02, PNorm = 67.5532, GNorm = 0.5481, lr_0 = 2.9045e-04
Loss = 5.4684e-02, PNorm = 67.5595, GNorm = 0.4803, lr_0 = 2.9025e-04
Loss = 5.3515e-02, PNorm = 67.5680, GNorm = 0.4816, lr_0 = 2.9005e-04
Loss = 5.6185e-02, PNorm = 67.5759, GNorm = 0.4431, lr_0 = 2.8985e-04
Loss = 5.5674e-02, PNorm = 67.5807, GNorm = 1.0145, lr_0 = 2.8965e-04
Loss = 5.1069e-02, PNorm = 67.5887, GNorm = 0.4577, lr_0 = 2.8945e-04
Loss = 4.8253e-02, PNorm = 67.5963, GNorm = 0.6951, lr_0 = 2.8925e-04
Loss = 5.6693e-02, PNorm = 67.6003, GNorm = 0.4490, lr_0 = 2.8906e-04
Loss = 5.8402e-02, PNorm = 67.6058, GNorm = 0.5422, lr_0 = 2.8886e-04
Loss = 5.8011e-02, PNorm = 67.6114, GNorm = 0.7808, lr_0 = 2.8866e-04
Loss = 5.2848e-02, PNorm = 67.6172, GNorm = 0.4408, lr_0 = 2.8846e-04
Loss = 4.9654e-02, PNorm = 67.6209, GNorm = 0.6299, lr_0 = 2.8826e-04
Loss = 6.0012e-02, PNorm = 67.6273, GNorm = 0.8184, lr_0 = 2.8807e-04
Loss = 4.2172e-02, PNorm = 67.6333, GNorm = 0.4521, lr_0 = 2.8787e-04
Loss = 4.6475e-02, PNorm = 67.6373, GNorm = 0.8166, lr_0 = 2.8767e-04
Loss = 4.8275e-02, PNorm = 67.6425, GNorm = 0.5525, lr_0 = 2.8748e-04
Loss = 5.1278e-02, PNorm = 67.6454, GNorm = 0.6713, lr_0 = 2.8728e-04
Loss = 5.5996e-02, PNorm = 67.6473, GNorm = 0.4978, lr_0 = 2.8708e-04
Loss = 5.6510e-02, PNorm = 67.6520, GNorm = 0.5414, lr_0 = 2.8689e-04
Loss = 6.3781e-02, PNorm = 67.6595, GNorm = 0.5083, lr_0 = 2.8669e-04
Loss = 6.4256e-02, PNorm = 67.6677, GNorm = 0.7836, lr_0 = 2.8649e-04
Loss = 5.1317e-02, PNorm = 67.6740, GNorm = 0.4285, lr_0 = 2.8630e-04
Loss = 5.4267e-02, PNorm = 67.6794, GNorm = 0.9761, lr_0 = 2.8610e-04
Loss = 5.1844e-02, PNorm = 67.6848, GNorm = 0.4122, lr_0 = 2.8590e-04
Loss = 5.6108e-02, PNorm = 67.6895, GNorm = 0.4902, lr_0 = 2.8571e-04
Loss = 6.3794e-02, PNorm = 67.6938, GNorm = 0.5470, lr_0 = 2.8551e-04
Loss = 5.7165e-02, PNorm = 67.6987, GNorm = 0.7818, lr_0 = 2.8532e-04
Loss = 6.4689e-02, PNorm = 67.7045, GNorm = 1.0221, lr_0 = 2.8512e-04
Loss = 6.4467e-02, PNorm = 67.7096, GNorm = 0.7844, lr_0 = 2.8493e-04
Loss = 5.9366e-02, PNorm = 67.7141, GNorm = 0.5518, lr_0 = 2.8473e-04
Loss = 5.2663e-02, PNorm = 67.7202, GNorm = 0.8727, lr_0 = 2.8454e-04
Loss = 4.8167e-02, PNorm = 67.7258, GNorm = 0.8739, lr_0 = 2.8434e-04
Loss = 5.0119e-02, PNorm = 67.7315, GNorm = 0.5532, lr_0 = 2.8415e-04
Loss = 5.8393e-02, PNorm = 67.7382, GNorm = 0.4797, lr_0 = 2.8395e-04
Loss = 5.5279e-02, PNorm = 67.7436, GNorm = 0.9762, lr_0 = 2.8376e-04
Loss = 5.3284e-02, PNorm = 67.7466, GNorm = 0.5409, lr_0 = 2.8356e-04
Loss = 5.4807e-02, PNorm = 67.7524, GNorm = 0.4180, lr_0 = 2.8337e-04
Loss = 6.0541e-02, PNorm = 67.7607, GNorm = 0.4968, lr_0 = 2.8317e-04
Loss = 4.9960e-02, PNorm = 67.7665, GNorm = 0.8441, lr_0 = 2.8298e-04
Loss = 6.2088e-02, PNorm = 67.7728, GNorm = 0.4558, lr_0 = 2.8279e-04
Loss = 4.9496e-02, PNorm = 67.7763, GNorm = 0.6806, lr_0 = 2.8259e-04
Loss = 4.8247e-02, PNorm = 67.7815, GNorm = 0.5620, lr_0 = 2.8240e-04
Loss = 6.0071e-02, PNorm = 67.7838, GNorm = 0.5388, lr_0 = 2.8221e-04
Loss = 5.1030e-02, PNorm = 67.7857, GNorm = 0.3437, lr_0 = 2.8201e-04
Loss = 4.9907e-02, PNorm = 67.7893, GNorm = 0.5360, lr_0 = 2.8182e-04
Loss = 5.5297e-02, PNorm = 67.7960, GNorm = 0.5315, lr_0 = 2.8163e-04
Loss = 6.0490e-02, PNorm = 67.8006, GNorm = 0.5319, lr_0 = 2.8143e-04
Loss = 5.4128e-02, PNorm = 67.8059, GNorm = 0.6673, lr_0 = 2.8124e-04
Loss = 5.2312e-02, PNorm = 67.8117, GNorm = 0.5457, lr_0 = 2.8105e-04
Loss = 5.4427e-02, PNorm = 67.8169, GNorm = 0.6685, lr_0 = 2.8085e-04
Loss = 4.9507e-02, PNorm = 67.8223, GNorm = 0.5297, lr_0 = 2.8066e-04
Loss = 5.5997e-02, PNorm = 67.8263, GNorm = 0.5238, lr_0 = 2.8047e-04
Loss = 5.4471e-02, PNorm = 67.8303, GNorm = 0.6105, lr_0 = 2.8028e-04
Loss = 5.8907e-02, PNorm = 67.8357, GNorm = 0.5920, lr_0 = 2.8009e-04
Loss = 5.3243e-02, PNorm = 67.8387, GNorm = 0.4772, lr_0 = 2.7989e-04
Loss = 5.6228e-02, PNorm = 67.8422, GNorm = 0.6189, lr_0 = 2.7970e-04
Loss = 5.5845e-02, PNorm = 67.8453, GNorm = 0.5753, lr_0 = 2.7951e-04
Loss = 6.1460e-02, PNorm = 67.8493, GNorm = 0.6577, lr_0 = 2.7932e-04
Loss = 5.9327e-02, PNorm = 67.8550, GNorm = 0.5230, lr_0 = 2.7913e-04
Loss = 5.5969e-02, PNorm = 67.8591, GNorm = 0.8071, lr_0 = 2.7894e-04
Loss = 6.1340e-02, PNorm = 67.8638, GNorm = 0.4798, lr_0 = 2.7875e-04
Loss = 5.6920e-02, PNorm = 67.8722, GNorm = 0.4805, lr_0 = 2.7855e-04
Loss = 6.1474e-02, PNorm = 67.8778, GNorm = 1.4539, lr_0 = 2.7836e-04
Loss = 5.8269e-02, PNorm = 67.8817, GNorm = 1.1385, lr_0 = 2.7817e-04
Loss = 5.0875e-02, PNorm = 67.8869, GNorm = 0.5106, lr_0 = 2.7798e-04
Loss = 6.6372e-02, PNorm = 67.8924, GNorm = 0.6014, lr_0 = 2.7779e-04
Loss = 7.2870e-02, PNorm = 67.8971, GNorm = 0.5656, lr_0 = 2.7760e-04
Loss = 4.8190e-02, PNorm = 67.9041, GNorm = 0.5021, lr_0 = 2.7741e-04
Loss = 6.7596e-02, PNorm = 67.9095, GNorm = 0.8478, lr_0 = 2.7722e-04
Loss = 5.9020e-02, PNorm = 67.9129, GNorm = 0.5182, lr_0 = 2.7703e-04
Loss = 6.6455e-02, PNorm = 67.9185, GNorm = 0.4347, lr_0 = 2.7684e-04
Loss = 7.3719e-02, PNorm = 67.9256, GNorm = 0.5957, lr_0 = 2.7665e-04
Loss = 5.5549e-02, PNorm = 67.9323, GNorm = 0.5916, lr_0 = 2.7646e-04
Loss = 5.3447e-02, PNorm = 67.9353, GNorm = 0.5344, lr_0 = 2.7627e-04
Loss = 5.9369e-02, PNorm = 67.9384, GNorm = 0.8353, lr_0 = 2.7608e-04
Loss = 5.5100e-02, PNorm = 67.9430, GNorm = 0.4529, lr_0 = 2.7590e-04
Loss = 6.6401e-02, PNorm = 67.9487, GNorm = 0.7295, lr_0 = 2.7571e-04
Loss = 5.7278e-02, PNorm = 67.9536, GNorm = 0.3449, lr_0 = 2.7552e-04
Loss = 5.2937e-02, PNorm = 67.9587, GNorm = 0.4686, lr_0 = 2.7533e-04
Loss = 5.3794e-02, PNorm = 67.9640, GNorm = 0.6792, lr_0 = 2.7514e-04
Loss = 5.7090e-02, PNorm = 67.9716, GNorm = 0.5683, lr_0 = 2.7495e-04
Loss = 5.5907e-02, PNorm = 67.9778, GNorm = 0.5557, lr_0 = 2.7476e-04
Loss = 7.2335e-02, PNorm = 67.9829, GNorm = 0.5457, lr_0 = 2.7457e-04
Loss = 6.8970e-02, PNorm = 67.9894, GNorm = 0.6570, lr_0 = 2.7439e-04
Loss = 5.7484e-02, PNorm = 67.9957, GNorm = 0.7241, lr_0 = 2.7420e-04
Loss = 6.4085e-02, PNorm = 68.0009, GNorm = 0.5573, lr_0 = 2.7401e-04
Loss = 5.4104e-02, PNorm = 68.0076, GNorm = 0.3483, lr_0 = 2.7382e-04
Loss = 5.2797e-02, PNorm = 68.0111, GNorm = 0.4293, lr_0 = 2.7364e-04
Loss = 6.4498e-02, PNorm = 68.0159, GNorm = 0.5775, lr_0 = 2.7345e-04
Loss = 5.6919e-02, PNorm = 68.0221, GNorm = 0.6944, lr_0 = 2.7326e-04
Loss = 6.1577e-02, PNorm = 68.0281, GNorm = 0.5380, lr_0 = 2.7307e-04
Loss = 4.1897e-02, PNorm = 68.0315, GNorm = 0.3691, lr_0 = 2.7289e-04
Loss = 5.5976e-02, PNorm = 68.0351, GNorm = 0.6772, lr_0 = 2.7270e-04
Loss = 5.6233e-02, PNorm = 68.0415, GNorm = 0.9293, lr_0 = 2.7251e-04
Loss = 5.2220e-02, PNorm = 68.0447, GNorm = 0.7963, lr_0 = 2.7233e-04
Loss = 7.0884e-02, PNorm = 68.0490, GNorm = 0.4570, lr_0 = 2.7214e-04
Loss = 6.3223e-02, PNorm = 68.0537, GNorm = 0.8569, lr_0 = 2.7195e-04
Loss = 5.1037e-02, PNorm = 68.0590, GNorm = 0.4847, lr_0 = 2.7177e-04
Loss = 5.9679e-02, PNorm = 68.0664, GNorm = 0.5863, lr_0 = 2.7158e-04
Loss = 6.2760e-02, PNorm = 68.0723, GNorm = 0.7802, lr_0 = 2.7139e-04
Loss = 5.7072e-02, PNorm = 68.0764, GNorm = 0.5497, lr_0 = 2.7121e-04
Loss = 6.3401e-02, PNorm = 68.0817, GNorm = 0.6396, lr_0 = 2.7102e-04
Loss = 5.2634e-02, PNorm = 68.0870, GNorm = 0.4917, lr_0 = 2.7084e-04
Loss = 5.5333e-02, PNorm = 68.0899, GNorm = 0.4326, lr_0 = 2.7065e-04
Loss = 5.7296e-02, PNorm = 68.0924, GNorm = 0.5795, lr_0 = 2.7047e-04
Loss = 5.5411e-02, PNorm = 68.0987, GNorm = 0.7797, lr_0 = 2.7028e-04
Loss = 5.1269e-02, PNorm = 68.1015, GNorm = 0.5419, lr_0 = 2.7010e-04
Loss = 5.4248e-02, PNorm = 68.1056, GNorm = 0.5328, lr_0 = 2.6991e-04
Loss = 5.4638e-02, PNorm = 68.1105, GNorm = 0.4404, lr_0 = 2.6973e-04
Loss = 5.8937e-02, PNorm = 68.1160, GNorm = 0.5233, lr_0 = 2.6954e-04
Loss = 5.9306e-02, PNorm = 68.1201, GNorm = 0.8215, lr_0 = 2.6936e-04
Loss = 6.4046e-02, PNorm = 68.1251, GNorm = 0.9289, lr_0 = 2.6917e-04
Loss = 5.6665e-02, PNorm = 68.1308, GNorm = 0.4570, lr_0 = 2.6899e-04
Loss = 6.3563e-02, PNorm = 68.1353, GNorm = 0.5517, lr_0 = 2.6880e-04
Loss = 5.6604e-02, PNorm = 68.1373, GNorm = 0.6183, lr_0 = 2.6862e-04
Loss = 6.1806e-02, PNorm = 68.1416, GNorm = 0.4795, lr_0 = 2.6844e-04
Loss = 6.4029e-02, PNorm = 68.1492, GNorm = 0.5280, lr_0 = 2.6825e-04
Validation mae = 0.389981
Epoch 18
Loss = 5.2531e-02, PNorm = 68.1557, GNorm = 0.3958, lr_0 = 2.6807e-04
Loss = 4.9202e-02, PNorm = 68.1622, GNorm = 0.8375, lr_0 = 2.6788e-04
Loss = 5.8062e-02, PNorm = 68.1666, GNorm = 0.5219, lr_0 = 2.6770e-04
Loss = 4.7479e-02, PNorm = 68.1725, GNorm = 0.5981, lr_0 = 2.6752e-04
Loss = 4.9226e-02, PNorm = 68.1795, GNorm = 0.4366, lr_0 = 2.6733e-04
Loss = 4.8760e-02, PNorm = 68.1857, GNorm = 0.5591, lr_0 = 2.6715e-04
Loss = 4.9945e-02, PNorm = 68.1922, GNorm = 0.5541, lr_0 = 2.6697e-04
Loss = 4.7554e-02, PNorm = 68.1972, GNorm = 0.4675, lr_0 = 2.6678e-04
Loss = 4.6637e-02, PNorm = 68.2016, GNorm = 0.5038, lr_0 = 2.6660e-04
Loss = 5.8141e-02, PNorm = 68.2056, GNorm = 0.3818, lr_0 = 2.6642e-04
Loss = 4.9558e-02, PNorm = 68.2114, GNorm = 0.4428, lr_0 = 2.6624e-04
Loss = 5.8972e-02, PNorm = 68.2187, GNorm = 0.5788, lr_0 = 2.6605e-04
Loss = 5.7731e-02, PNorm = 68.2259, GNorm = 0.7534, lr_0 = 2.6587e-04
Loss = 5.8955e-02, PNorm = 68.2320, GNorm = 0.5304, lr_0 = 2.6569e-04
Loss = 5.3674e-02, PNorm = 68.2373, GNorm = 0.6454, lr_0 = 2.6551e-04
Loss = 4.7129e-02, PNorm = 68.2435, GNorm = 0.5639, lr_0 = 2.6533e-04
Loss = 5.5316e-02, PNorm = 68.2492, GNorm = 0.5202, lr_0 = 2.6514e-04
Loss = 5.0565e-02, PNorm = 68.2547, GNorm = 0.8782, lr_0 = 2.6496e-04
Loss = 5.1599e-02, PNorm = 68.2604, GNorm = 0.4800, lr_0 = 2.6478e-04
Loss = 5.0191e-02, PNorm = 68.2655, GNorm = 0.5655, lr_0 = 2.6460e-04
Loss = 5.4974e-02, PNorm = 68.2702, GNorm = 0.4827, lr_0 = 2.6442e-04
Loss = 4.4019e-02, PNorm = 68.2765, GNorm = 0.5523, lr_0 = 2.6424e-04
Loss = 4.8419e-02, PNorm = 68.2801, GNorm = 0.5247, lr_0 = 2.6406e-04
Loss = 4.7403e-02, PNorm = 68.2848, GNorm = 0.4640, lr_0 = 2.6388e-04
Loss = 5.7715e-02, PNorm = 68.2926, GNorm = 0.7023, lr_0 = 2.6369e-04
Loss = 5.1546e-02, PNorm = 68.2977, GNorm = 0.5473, lr_0 = 2.6351e-04
Loss = 4.7999e-02, PNorm = 68.3037, GNorm = 0.5406, lr_0 = 2.6333e-04
Loss = 4.9118e-02, PNorm = 68.3097, GNorm = 0.4567, lr_0 = 2.6315e-04
Loss = 4.5830e-02, PNorm = 68.3145, GNorm = 0.6875, lr_0 = 2.6297e-04
Loss = 4.8658e-02, PNorm = 68.3186, GNorm = 0.6058, lr_0 = 2.6279e-04
Loss = 6.1744e-02, PNorm = 68.3218, GNorm = 0.6515, lr_0 = 2.6261e-04
Loss = 4.8224e-02, PNorm = 68.3277, GNorm = 0.5841, lr_0 = 2.6243e-04
Loss = 4.9471e-02, PNorm = 68.3326, GNorm = 0.5401, lr_0 = 2.6225e-04
Loss = 5.1882e-02, PNorm = 68.3373, GNorm = 0.6986, lr_0 = 2.6207e-04
Loss = 5.4429e-02, PNorm = 68.3441, GNorm = 0.7985, lr_0 = 2.6189e-04
Loss = 5.3933e-02, PNorm = 68.3497, GNorm = 0.5275, lr_0 = 2.6171e-04
Loss = 4.8553e-02, PNorm = 68.3551, GNorm = 0.5592, lr_0 = 2.6153e-04
Loss = 4.8442e-02, PNorm = 68.3592, GNorm = 0.5069, lr_0 = 2.6136e-04
Loss = 6.0326e-02, PNorm = 68.3615, GNorm = 0.6684, lr_0 = 2.6118e-04
Loss = 5.2535e-02, PNorm = 68.3670, GNorm = 0.7075, lr_0 = 2.6100e-04
Loss = 5.6741e-02, PNorm = 68.3706, GNorm = 0.5009, lr_0 = 2.6082e-04
Loss = 6.3024e-02, PNorm = 68.3731, GNorm = 0.4980, lr_0 = 2.6064e-04
Loss = 5.1305e-02, PNorm = 68.3782, GNorm = 0.4691, lr_0 = 2.6046e-04
Loss = 5.4779e-02, PNorm = 68.3833, GNorm = 0.6188, lr_0 = 2.6028e-04
Loss = 5.9544e-02, PNorm = 68.3892, GNorm = 0.6951, lr_0 = 2.6011e-04
Loss = 4.7525e-02, PNorm = 68.3928, GNorm = 0.5285, lr_0 = 2.5993e-04
Loss = 4.7164e-02, PNorm = 68.3965, GNorm = 0.5435, lr_0 = 2.5975e-04
Loss = 5.8521e-02, PNorm = 68.4024, GNorm = 0.4125, lr_0 = 2.5957e-04
Loss = 5.0045e-02, PNorm = 68.4064, GNorm = 0.4514, lr_0 = 2.5939e-04
Loss = 5.6885e-02, PNorm = 68.4105, GNorm = 0.5273, lr_0 = 2.5922e-04
Loss = 5.9039e-02, PNorm = 68.4148, GNorm = 0.8562, lr_0 = 2.5904e-04
Loss = 4.4943e-02, PNorm = 68.4199, GNorm = 0.5312, lr_0 = 2.5886e-04
Loss = 5.4735e-02, PNorm = 68.4214, GNorm = 0.6757, lr_0 = 2.5868e-04
Loss = 6.0599e-02, PNorm = 68.4256, GNorm = 0.4553, lr_0 = 2.5851e-04
Loss = 5.5424e-02, PNorm = 68.4295, GNorm = 0.5678, lr_0 = 2.5833e-04
Loss = 6.1071e-02, PNorm = 68.4343, GNorm = 0.6712, lr_0 = 2.5815e-04
Loss = 5.9822e-02, PNorm = 68.4417, GNorm = 0.7160, lr_0 = 2.5797e-04
Loss = 5.8997e-02, PNorm = 68.4462, GNorm = 0.6244, lr_0 = 2.5780e-04
Loss = 4.4544e-02, PNorm = 68.4497, GNorm = 0.4431, lr_0 = 2.5762e-04
Loss = 5.1427e-02, PNorm = 68.4515, GNorm = 0.4703, lr_0 = 2.5745e-04
Loss = 5.4187e-02, PNorm = 68.4545, GNorm = 0.4121, lr_0 = 2.5727e-04
Loss = 5.3221e-02, PNorm = 68.4588, GNorm = 0.8642, lr_0 = 2.5709e-04
Loss = 5.0495e-02, PNorm = 68.4626, GNorm = 0.5290, lr_0 = 2.5692e-04
Loss = 5.2181e-02, PNorm = 68.4663, GNorm = 0.5748, lr_0 = 2.5674e-04
Loss = 5.2174e-02, PNorm = 68.4687, GNorm = 0.5830, lr_0 = 2.5656e-04
Loss = 5.6615e-02, PNorm = 68.4732, GNorm = 0.7669, lr_0 = 2.5639e-04
Loss = 5.5311e-02, PNorm = 68.4773, GNorm = 0.5659, lr_0 = 2.5621e-04
Loss = 5.6006e-02, PNorm = 68.4826, GNorm = 0.6845, lr_0 = 2.5604e-04
Loss = 6.2186e-02, PNorm = 68.4898, GNorm = 0.5037, lr_0 = 2.5586e-04
Loss = 5.4363e-02, PNorm = 68.4937, GNorm = 0.5168, lr_0 = 2.5569e-04
Loss = 5.4762e-02, PNorm = 68.4978, GNorm = 0.5663, lr_0 = 2.5551e-04
Loss = 5.3843e-02, PNorm = 68.5027, GNorm = 0.5364, lr_0 = 2.5534e-04
Loss = 5.8286e-02, PNorm = 68.5072, GNorm = 0.5250, lr_0 = 2.5516e-04
Loss = 5.2096e-02, PNorm = 68.5106, GNorm = 0.4506, lr_0 = 2.5499e-04
Loss = 6.1902e-02, PNorm = 68.5130, GNorm = 0.4827, lr_0 = 2.5481e-04
Loss = 5.7860e-02, PNorm = 68.5186, GNorm = 0.6193, lr_0 = 2.5464e-04
Loss = 4.7328e-02, PNorm = 68.5224, GNorm = 0.4598, lr_0 = 2.5446e-04
Loss = 5.7382e-02, PNorm = 68.5263, GNorm = 0.9939, lr_0 = 2.5429e-04
Loss = 4.3113e-02, PNorm = 68.5310, GNorm = 0.3973, lr_0 = 2.5411e-04
Loss = 5.7649e-02, PNorm = 68.5342, GNorm = 0.7681, lr_0 = 2.5394e-04
Loss = 4.6297e-02, PNorm = 68.5370, GNorm = 0.5109, lr_0 = 2.5377e-04
Loss = 5.4844e-02, PNorm = 68.5402, GNorm = 0.4645, lr_0 = 2.5359e-04
Loss = 5.3883e-02, PNorm = 68.5452, GNorm = 0.5748, lr_0 = 2.5342e-04
Loss = 5.2247e-02, PNorm = 68.5492, GNorm = 0.6232, lr_0 = 2.5325e-04
Loss = 5.6104e-02, PNorm = 68.5535, GNorm = 1.2217, lr_0 = 2.5307e-04
Loss = 5.5976e-02, PNorm = 68.5578, GNorm = 0.5894, lr_0 = 2.5290e-04
Loss = 5.9752e-02, PNorm = 68.5631, GNorm = 0.8750, lr_0 = 2.5273e-04
Loss = 5.4848e-02, PNorm = 68.5660, GNorm = 0.6076, lr_0 = 2.5255e-04
Loss = 5.5646e-02, PNorm = 68.5686, GNorm = 0.4789, lr_0 = 2.5238e-04
Loss = 4.9813e-02, PNorm = 68.5729, GNorm = 0.6119, lr_0 = 2.5221e-04
Loss = 5.3276e-02, PNorm = 68.5806, GNorm = 0.6920, lr_0 = 2.5203e-04
Loss = 5.4146e-02, PNorm = 68.5860, GNorm = 0.4515, lr_0 = 2.5186e-04
Loss = 5.1230e-02, PNorm = 68.5883, GNorm = 0.4608, lr_0 = 2.5169e-04
Loss = 5.2677e-02, PNorm = 68.5907, GNorm = 0.7267, lr_0 = 2.5152e-04
Loss = 5.5387e-02, PNorm = 68.5932, GNorm = 0.5353, lr_0 = 2.5134e-04
Loss = 6.0848e-02, PNorm = 68.5955, GNorm = 0.9229, lr_0 = 2.5117e-04
Loss = 4.9882e-02, PNorm = 68.5988, GNorm = 0.6017, lr_0 = 2.5100e-04
Loss = 5.9838e-02, PNorm = 68.6031, GNorm = 0.6596, lr_0 = 2.5083e-04
Loss = 5.0029e-02, PNorm = 68.6085, GNorm = 0.4164, lr_0 = 2.5066e-04
Loss = 5.5939e-02, PNorm = 68.6135, GNorm = 0.6563, lr_0 = 2.5048e-04
Loss = 5.6157e-02, PNorm = 68.6169, GNorm = 0.7203, lr_0 = 2.5031e-04
Loss = 5.5320e-02, PNorm = 68.6188, GNorm = 0.5119, lr_0 = 2.5014e-04
Loss = 5.3285e-02, PNorm = 68.6238, GNorm = 0.9956, lr_0 = 2.4997e-04
Loss = 5.2191e-02, PNorm = 68.6287, GNorm = 0.7895, lr_0 = 2.4980e-04
Loss = 4.7637e-02, PNorm = 68.6332, GNorm = 0.5019, lr_0 = 2.4963e-04
Loss = 5.2403e-02, PNorm = 68.6386, GNorm = 0.4825, lr_0 = 2.4946e-04
Loss = 5.9987e-02, PNorm = 68.6445, GNorm = 0.4493, lr_0 = 2.4929e-04
Loss = 5.3610e-02, PNorm = 68.6486, GNorm = 0.5922, lr_0 = 2.4911e-04
Loss = 5.4385e-02, PNorm = 68.6541, GNorm = 0.5808, lr_0 = 2.4894e-04
Loss = 6.0712e-02, PNorm = 68.6561, GNorm = 0.4959, lr_0 = 2.4877e-04
Loss = 4.4328e-02, PNorm = 68.6576, GNorm = 0.6059, lr_0 = 2.4860e-04
Loss = 5.9057e-02, PNorm = 68.6634, GNorm = 0.6062, lr_0 = 2.4843e-04
Loss = 4.0031e-02, PNorm = 68.6681, GNorm = 0.6921, lr_0 = 2.4826e-04
Loss = 5.7189e-02, PNorm = 68.6705, GNorm = 0.7184, lr_0 = 2.4809e-04
Loss = 5.5471e-02, PNorm = 68.6754, GNorm = 0.6487, lr_0 = 2.4792e-04
Loss = 5.5747e-02, PNorm = 68.6812, GNorm = 0.7693, lr_0 = 2.4775e-04
Loss = 5.4548e-02, PNorm = 68.6875, GNorm = 0.6254, lr_0 = 2.4758e-04
Loss = 4.7834e-02, PNorm = 68.6934, GNorm = 0.4996, lr_0 = 2.4741e-04
Loss = 5.4386e-02, PNorm = 68.6964, GNorm = 0.7051, lr_0 = 2.4724e-04
Loss = 5.3899e-02, PNorm = 68.6975, GNorm = 0.4101, lr_0 = 2.4707e-04
Validation mae = 0.387045
Epoch 19
Loss = 5.1319e-02, PNorm = 68.7013, GNorm = 0.4468, lr_0 = 2.4690e-04
Loss = 3.8743e-02, PNorm = 68.7063, GNorm = 0.4511, lr_0 = 2.4674e-04
Loss = 4.2768e-02, PNorm = 68.7094, GNorm = 0.7582, lr_0 = 2.4657e-04
Loss = 5.4842e-02, PNorm = 68.7144, GNorm = 0.8436, lr_0 = 2.4640e-04
Loss = 5.8684e-02, PNorm = 68.7201, GNorm = 0.5342, lr_0 = 2.4623e-04
Loss = 4.4272e-02, PNorm = 68.7259, GNorm = 0.6715, lr_0 = 2.4606e-04
Loss = 5.1343e-02, PNorm = 68.7307, GNorm = 0.3697, lr_0 = 2.4589e-04
Loss = 4.5666e-02, PNorm = 68.7348, GNorm = 0.5576, lr_0 = 2.4572e-04
Loss = 4.4410e-02, PNorm = 68.7395, GNorm = 0.6746, lr_0 = 2.4556e-04
Loss = 4.6636e-02, PNorm = 68.7428, GNorm = 0.3660, lr_0 = 2.4539e-04
Loss = 4.8417e-02, PNorm = 68.7464, GNorm = 0.5343, lr_0 = 2.4522e-04
Loss = 4.6677e-02, PNorm = 68.7517, GNorm = 0.9377, lr_0 = 2.4505e-04
Loss = 4.5795e-02, PNorm = 68.7563, GNorm = 0.6522, lr_0 = 2.4488e-04
Loss = 5.5429e-02, PNorm = 68.7620, GNorm = 0.5561, lr_0 = 2.4472e-04
Loss = 4.7614e-02, PNorm = 68.7677, GNorm = 0.5323, lr_0 = 2.4455e-04
Loss = 5.4761e-02, PNorm = 68.7712, GNorm = 0.6360, lr_0 = 2.4438e-04
Loss = 5.0350e-02, PNorm = 68.7765, GNorm = 0.5535, lr_0 = 2.4421e-04
Loss = 5.6701e-02, PNorm = 68.7815, GNorm = 0.5012, lr_0 = 2.4405e-04
Loss = 5.0473e-02, PNorm = 68.7875, GNorm = 0.7508, lr_0 = 2.4388e-04
Loss = 4.3659e-02, PNorm = 68.7925, GNorm = 0.4924, lr_0 = 2.4371e-04
Loss = 5.0195e-02, PNorm = 68.7956, GNorm = 0.6959, lr_0 = 2.4354e-04
Loss = 5.0336e-02, PNorm = 68.8006, GNorm = 1.0016, lr_0 = 2.4338e-04
Loss = 4.7877e-02, PNorm = 68.8059, GNorm = 0.6087, lr_0 = 2.4321e-04
Loss = 5.1746e-02, PNorm = 68.8122, GNorm = 0.5557, lr_0 = 2.4304e-04
Loss = 4.9693e-02, PNorm = 68.8161, GNorm = 0.6444, lr_0 = 2.4288e-04
Loss = 5.4428e-02, PNorm = 68.8205, GNorm = 0.4449, lr_0 = 2.4271e-04
Loss = 4.5125e-02, PNorm = 68.8256, GNorm = 0.5165, lr_0 = 2.4254e-04
Loss = 4.3664e-02, PNorm = 68.8305, GNorm = 0.6617, lr_0 = 2.4238e-04
Loss = 4.5656e-02, PNorm = 68.8333, GNorm = 0.5267, lr_0 = 2.4221e-04
Loss = 5.1622e-02, PNorm = 68.8376, GNorm = 0.5388, lr_0 = 2.4205e-04
Loss = 4.7598e-02, PNorm = 68.8431, GNorm = 0.5956, lr_0 = 2.4188e-04
Loss = 4.1877e-02, PNorm = 68.8458, GNorm = 0.4814, lr_0 = 2.4171e-04
Loss = 4.9311e-02, PNorm = 68.8483, GNorm = 0.8825, lr_0 = 2.4155e-04
Loss = 5.3666e-02, PNorm = 68.8540, GNorm = 0.7068, lr_0 = 2.4138e-04
Loss = 5.0526e-02, PNorm = 68.8585, GNorm = 0.5197, lr_0 = 2.4122e-04
Loss = 5.2949e-02, PNorm = 68.8621, GNorm = 0.6190, lr_0 = 2.4105e-04
Loss = 4.7297e-02, PNorm = 68.8666, GNorm = 0.5630, lr_0 = 2.4089e-04
Loss = 4.9531e-02, PNorm = 68.8712, GNorm = 0.4480, lr_0 = 2.4072e-04
Loss = 5.4471e-02, PNorm = 68.8747, GNorm = 0.6053, lr_0 = 2.4056e-04
Loss = 5.4595e-02, PNorm = 68.8776, GNorm = 0.4632, lr_0 = 2.4039e-04
Loss = 4.6357e-02, PNorm = 68.8827, GNorm = 0.4838, lr_0 = 2.4023e-04
Loss = 5.4198e-02, PNorm = 68.8871, GNorm = 0.4379, lr_0 = 2.4006e-04
Loss = 4.8764e-02, PNorm = 68.8922, GNorm = 0.4706, lr_0 = 2.3990e-04
Loss = 5.7550e-02, PNorm = 68.8991, GNorm = 0.7068, lr_0 = 2.3974e-04
Loss = 5.3924e-02, PNorm = 68.9046, GNorm = 0.8784, lr_0 = 2.3957e-04
Loss = 4.7517e-02, PNorm = 68.9107, GNorm = 0.7661, lr_0 = 2.3941e-04
Loss = 5.6909e-02, PNorm = 68.9170, GNorm = 0.5269, lr_0 = 2.3924e-04
Loss = 5.0815e-02, PNorm = 68.9212, GNorm = 0.4435, lr_0 = 2.3908e-04
Loss = 4.9943e-02, PNorm = 68.9233, GNorm = 0.5386, lr_0 = 2.3892e-04
Loss = 5.4606e-02, PNorm = 68.9258, GNorm = 0.7764, lr_0 = 2.3875e-04
Loss = 4.5831e-02, PNorm = 68.9297, GNorm = 0.5119, lr_0 = 2.3859e-04
Loss = 4.8950e-02, PNorm = 68.9350, GNorm = 0.4062, lr_0 = 2.3842e-04
Loss = 4.3959e-02, PNorm = 68.9396, GNorm = 0.5249, lr_0 = 2.3826e-04
Loss = 4.7071e-02, PNorm = 68.9437, GNorm = 0.5135, lr_0 = 2.3810e-04
Loss = 4.8614e-02, PNorm = 68.9475, GNorm = 0.7671, lr_0 = 2.3794e-04
Loss = 4.6773e-02, PNorm = 68.9501, GNorm = 0.5417, lr_0 = 2.3777e-04
Loss = 5.0292e-02, PNorm = 68.9531, GNorm = 0.6127, lr_0 = 2.3761e-04
Loss = 4.6474e-02, PNorm = 68.9560, GNorm = 0.5338, lr_0 = 2.3745e-04
Loss = 4.9423e-02, PNorm = 68.9582, GNorm = 0.5535, lr_0 = 2.3728e-04
Loss = 5.4286e-02, PNorm = 68.9613, GNorm = 0.9243, lr_0 = 2.3712e-04
Loss = 4.9067e-02, PNorm = 68.9661, GNorm = 0.4799, lr_0 = 2.3696e-04
Loss = 5.5948e-02, PNorm = 68.9708, GNorm = 0.3740, lr_0 = 2.3680e-04
Loss = 5.9287e-02, PNorm = 68.9759, GNorm = 0.6655, lr_0 = 2.3663e-04
Loss = 5.1042e-02, PNorm = 68.9801, GNorm = 0.7833, lr_0 = 2.3647e-04
Loss = 5.1760e-02, PNorm = 68.9854, GNorm = 0.6715, lr_0 = 2.3631e-04
Loss = 5.2896e-02, PNorm = 68.9906, GNorm = 0.5632, lr_0 = 2.3615e-04
Loss = 4.7436e-02, PNorm = 68.9958, GNorm = 0.6653, lr_0 = 2.3599e-04
Loss = 4.7648e-02, PNorm = 69.0009, GNorm = 0.5230, lr_0 = 2.3582e-04
Loss = 5.1813e-02, PNorm = 69.0054, GNorm = 0.6794, lr_0 = 2.3566e-04
Loss = 4.6372e-02, PNorm = 69.0094, GNorm = 0.4725, lr_0 = 2.3550e-04
Loss = 4.4779e-02, PNorm = 69.0137, GNorm = 0.4759, lr_0 = 2.3534e-04
Loss = 4.7076e-02, PNorm = 69.0183, GNorm = 0.4334, lr_0 = 2.3518e-04
Loss = 4.8430e-02, PNorm = 69.0219, GNorm = 0.6579, lr_0 = 2.3502e-04
Loss = 5.4026e-02, PNorm = 69.0244, GNorm = 0.4775, lr_0 = 2.3486e-04
Loss = 4.8152e-02, PNorm = 69.0281, GNorm = 0.7033, lr_0 = 2.3470e-04
Loss = 5.3127e-02, PNorm = 69.0332, GNorm = 0.7252, lr_0 = 2.3454e-04
Loss = 4.8764e-02, PNorm = 69.0358, GNorm = 0.5020, lr_0 = 2.3437e-04
Loss = 4.9038e-02, PNorm = 69.0392, GNorm = 0.5853, lr_0 = 2.3421e-04
Loss = 5.6247e-02, PNorm = 69.0447, GNorm = 0.6991, lr_0 = 2.3405e-04
Loss = 6.2671e-02, PNorm = 69.0482, GNorm = 0.4198, lr_0 = 2.3389e-04
Loss = 5.0291e-02, PNorm = 69.0511, GNorm = 0.5736, lr_0 = 2.3373e-04
Loss = 5.8217e-02, PNorm = 69.0553, GNorm = 0.5404, lr_0 = 2.3357e-04
Loss = 5.2607e-02, PNorm = 69.0589, GNorm = 0.5610, lr_0 = 2.3341e-04
Loss = 5.0184e-02, PNorm = 69.0635, GNorm = 0.6146, lr_0 = 2.3325e-04
Loss = 4.9185e-02, PNorm = 69.0675, GNorm = 0.5500, lr_0 = 2.3309e-04
Loss = 4.6704e-02, PNorm = 69.0706, GNorm = 0.7580, lr_0 = 2.3293e-04
Loss = 4.9743e-02, PNorm = 69.0742, GNorm = 0.6152, lr_0 = 2.3277e-04
Loss = 4.8014e-02, PNorm = 69.0765, GNorm = 0.5008, lr_0 = 2.3261e-04
Loss = 4.7821e-02, PNorm = 69.0805, GNorm = 0.5103, lr_0 = 2.3246e-04
Loss = 4.8613e-02, PNorm = 69.0847, GNorm = 0.4797, lr_0 = 2.3230e-04
Loss = 4.3507e-02, PNorm = 69.0873, GNorm = 0.4112, lr_0 = 2.3214e-04
Loss = 4.6509e-02, PNorm = 69.0894, GNorm = 0.6984, lr_0 = 2.3198e-04
Loss = 5.0036e-02, PNorm = 69.0918, GNorm = 0.3947, lr_0 = 2.3182e-04
Loss = 4.8306e-02, PNorm = 69.0957, GNorm = 0.5642, lr_0 = 2.3166e-04
Loss = 4.9611e-02, PNorm = 69.0989, GNorm = 0.7762, lr_0 = 2.3150e-04
Loss = 5.1999e-02, PNorm = 69.1033, GNorm = 0.7003, lr_0 = 2.3134e-04
Loss = 4.7582e-02, PNorm = 69.1093, GNorm = 1.0739, lr_0 = 2.3118e-04
Loss = 5.0034e-02, PNorm = 69.1122, GNorm = 0.6092, lr_0 = 2.3103e-04
Loss = 5.1250e-02, PNorm = 69.1177, GNorm = 0.5007, lr_0 = 2.3087e-04
Loss = 4.5353e-02, PNorm = 69.1218, GNorm = 0.5151, lr_0 = 2.3071e-04
Loss = 5.0874e-02, PNorm = 69.1255, GNorm = 0.7915, lr_0 = 2.3055e-04
Loss = 5.6095e-02, PNorm = 69.1298, GNorm = 0.5523, lr_0 = 2.3039e-04
Loss = 5.7200e-02, PNorm = 69.1353, GNorm = 0.6796, lr_0 = 2.3024e-04
Loss = 4.9251e-02, PNorm = 69.1377, GNorm = 0.7223, lr_0 = 2.3008e-04
Loss = 5.9038e-02, PNorm = 69.1387, GNorm = 0.8534, lr_0 = 2.2992e-04
Loss = 4.6419e-02, PNorm = 69.1426, GNorm = 0.4794, lr_0 = 2.2976e-04
Loss = 5.1349e-02, PNorm = 69.1463, GNorm = 0.6879, lr_0 = 2.2961e-04
Loss = 5.6673e-02, PNorm = 69.1503, GNorm = 0.8436, lr_0 = 2.2945e-04
Loss = 5.1256e-02, PNorm = 69.1539, GNorm = 0.5989, lr_0 = 2.2929e-04
Loss = 4.5601e-02, PNorm = 69.1566, GNorm = 0.8700, lr_0 = 2.2913e-04
Loss = 4.5798e-02, PNorm = 69.1592, GNorm = 0.7680, lr_0 = 2.2898e-04
Loss = 4.9833e-02, PNorm = 69.1631, GNorm = 0.5107, lr_0 = 2.2882e-04
Loss = 5.1268e-02, PNorm = 69.1654, GNorm = 0.3890, lr_0 = 2.2866e-04
Loss = 5.1058e-02, PNorm = 69.1694, GNorm = 0.4038, lr_0 = 2.2851e-04
Loss = 4.4131e-02, PNorm = 69.1736, GNorm = 0.5213, lr_0 = 2.2835e-04
Loss = 5.3145e-02, PNorm = 69.1763, GNorm = 0.4406, lr_0 = 2.2819e-04
Loss = 5.8004e-02, PNorm = 69.1793, GNorm = 0.5548, lr_0 = 2.2804e-04
Loss = 5.4731e-02, PNorm = 69.1833, GNorm = 0.5298, lr_0 = 2.2788e-04
Loss = 5.1767e-02, PNorm = 69.1863, GNorm = 0.7541, lr_0 = 2.2773e-04
Loss = 4.7708e-02, PNorm = 69.1894, GNorm = 0.4450, lr_0 = 2.2757e-04
Validation mae = 0.387208
Epoch 20
Loss = 4.1912e-02, PNorm = 69.1943, GNorm = 0.4797, lr_0 = 2.2741e-04
Loss = 5.0338e-02, PNorm = 69.1996, GNorm = 0.5516, lr_0 = 2.2726e-04
Loss = 4.5209e-02, PNorm = 69.2051, GNorm = 0.5579, lr_0 = 2.2710e-04
Loss = 4.9437e-02, PNorm = 69.2086, GNorm = 0.6592, lr_0 = 2.2695e-04
Loss = 4.8740e-02, PNorm = 69.2136, GNorm = 0.3923, lr_0 = 2.2679e-04
Loss = 5.0361e-02, PNorm = 69.2192, GNorm = 0.5804, lr_0 = 2.2664e-04
Loss = 4.2097e-02, PNorm = 69.2249, GNorm = 0.5237, lr_0 = 2.2648e-04
Loss = 3.9458e-02, PNorm = 69.2299, GNorm = 0.3998, lr_0 = 2.2632e-04
Loss = 4.5035e-02, PNorm = 69.2349, GNorm = 0.4630, lr_0 = 2.2617e-04
Loss = 3.8743e-02, PNorm = 69.2395, GNorm = 0.4358, lr_0 = 2.2601e-04
Loss = 4.5763e-02, PNorm = 69.2438, GNorm = 0.3691, lr_0 = 2.2586e-04
Loss = 3.5782e-02, PNorm = 69.2488, GNorm = 0.6049, lr_0 = 2.2571e-04
Loss = 4.5102e-02, PNorm = 69.2524, GNorm = 0.4296, lr_0 = 2.2555e-04
Loss = 4.3932e-02, PNorm = 69.2547, GNorm = 0.5241, lr_0 = 2.2540e-04
Loss = 3.8575e-02, PNorm = 69.2589, GNorm = 0.4117, lr_0 = 2.2524e-04
Loss = 4.3116e-02, PNorm = 69.2622, GNorm = 0.5182, lr_0 = 2.2509e-04
Loss = 4.7377e-02, PNorm = 69.2657, GNorm = 0.5832, lr_0 = 2.2493e-04
Loss = 4.1808e-02, PNorm = 69.2690, GNorm = 0.6384, lr_0 = 2.2478e-04
Loss = 4.8404e-02, PNorm = 69.2732, GNorm = 0.8004, lr_0 = 2.2463e-04
Loss = 3.9709e-02, PNorm = 69.2769, GNorm = 0.4485, lr_0 = 2.2447e-04
Loss = 4.4583e-02, PNorm = 69.2797, GNorm = 0.7476, lr_0 = 2.2432e-04
Loss = 4.2369e-02, PNorm = 69.2817, GNorm = 0.6338, lr_0 = 2.2416e-04
Loss = 3.9303e-02, PNorm = 69.2848, GNorm = 0.6874, lr_0 = 2.2401e-04
Loss = 5.5154e-02, PNorm = 69.2896, GNorm = 0.6304, lr_0 = 2.2386e-04
Loss = 4.6031e-02, PNorm = 69.2945, GNorm = 0.4962, lr_0 = 2.2370e-04
Loss = 4.4323e-02, PNorm = 69.2971, GNorm = 0.6286, lr_0 = 2.2355e-04
Loss = 4.6918e-02, PNorm = 69.2997, GNorm = 0.4510, lr_0 = 2.2340e-04
Loss = 4.3557e-02, PNorm = 69.3035, GNorm = 0.6665, lr_0 = 2.2324e-04
Loss = 4.7743e-02, PNorm = 69.3079, GNorm = 0.7593, lr_0 = 2.2309e-04
Loss = 5.1530e-02, PNorm = 69.3126, GNorm = 0.6537, lr_0 = 2.2294e-04
Loss = 4.2009e-02, PNorm = 69.3155, GNorm = 0.5826, lr_0 = 2.2279e-04
Loss = 4.2662e-02, PNorm = 69.3174, GNorm = 0.4965, lr_0 = 2.2263e-04
Loss = 4.7640e-02, PNorm = 69.3199, GNorm = 0.5303, lr_0 = 2.2248e-04
Loss = 4.7238e-02, PNorm = 69.3245, GNorm = 0.4203, lr_0 = 2.2233e-04
Loss = 4.5740e-02, PNorm = 69.3293, GNorm = 0.6540, lr_0 = 2.2218e-04
Loss = 4.3183e-02, PNorm = 69.3336, GNorm = 0.4394, lr_0 = 2.2202e-04
Loss = 4.6575e-02, PNorm = 69.3367, GNorm = 0.5166, lr_0 = 2.2187e-04
Loss = 4.2940e-02, PNorm = 69.3394, GNorm = 0.4191, lr_0 = 2.2172e-04
Loss = 5.5640e-02, PNorm = 69.3446, GNorm = 0.6382, lr_0 = 2.2157e-04
Loss = 5.1133e-02, PNorm = 69.3490, GNorm = 0.5948, lr_0 = 2.2142e-04
Loss = 4.9699e-02, PNorm = 69.3546, GNorm = 0.7546, lr_0 = 2.2126e-04
Loss = 5.6620e-02, PNorm = 69.3591, GNorm = 0.7701, lr_0 = 2.2111e-04
Loss = 4.4266e-02, PNorm = 69.3624, GNorm = 0.3536, lr_0 = 2.2096e-04
Loss = 4.4654e-02, PNorm = 69.3649, GNorm = 0.7231, lr_0 = 2.2081e-04
Loss = 4.2825e-02, PNorm = 69.3689, GNorm = 0.5702, lr_0 = 2.2066e-04
Loss = 4.4073e-02, PNorm = 69.3726, GNorm = 0.5007, lr_0 = 2.2051e-04
Loss = 4.6942e-02, PNorm = 69.3763, GNorm = 0.5288, lr_0 = 2.2036e-04
Loss = 4.2548e-02, PNorm = 69.3799, GNorm = 0.5822, lr_0 = 2.2021e-04
Loss = 4.5846e-02, PNorm = 69.3835, GNorm = 0.5675, lr_0 = 2.2005e-04
Loss = 4.9591e-02, PNorm = 69.3880, GNorm = 0.8145, lr_0 = 2.1990e-04
Loss = 5.1916e-02, PNorm = 69.3925, GNorm = 0.8051, lr_0 = 2.1975e-04
Loss = 4.5936e-02, PNorm = 69.3968, GNorm = 0.6148, lr_0 = 2.1960e-04
Loss = 4.6105e-02, PNorm = 69.4006, GNorm = 0.5142, lr_0 = 2.1945e-04
Loss = 4.5134e-02, PNorm = 69.4037, GNorm = 0.5828, lr_0 = 2.1930e-04
Loss = 4.6266e-02, PNorm = 69.4068, GNorm = 0.4838, lr_0 = 2.1915e-04
Loss = 5.0414e-02, PNorm = 69.4098, GNorm = 0.6010, lr_0 = 2.1900e-04
Loss = 4.2034e-02, PNorm = 69.4118, GNorm = 0.6158, lr_0 = 2.1885e-04
Loss = 4.8739e-02, PNorm = 69.4167, GNorm = 0.7704, lr_0 = 2.1870e-04
Loss = 5.5248e-02, PNorm = 69.4203, GNorm = 0.9173, lr_0 = 2.1855e-04
Loss = 4.1139e-02, PNorm = 69.4211, GNorm = 0.5244, lr_0 = 2.1840e-04
Loss = 4.9374e-02, PNorm = 69.4231, GNorm = 0.5181, lr_0 = 2.1825e-04
Loss = 4.2304e-02, PNorm = 69.4257, GNorm = 0.6123, lr_0 = 2.1810e-04
Loss = 5.0138e-02, PNorm = 69.4298, GNorm = 0.5800, lr_0 = 2.1795e-04
Loss = 4.7549e-02, PNorm = 69.4328, GNorm = 0.8864, lr_0 = 2.1780e-04
Loss = 4.4165e-02, PNorm = 69.4365, GNorm = 0.4536, lr_0 = 2.1765e-04
Loss = 4.2874e-02, PNorm = 69.4411, GNorm = 0.5845, lr_0 = 2.1751e-04
Loss = 4.3643e-02, PNorm = 69.4470, GNorm = 0.3776, lr_0 = 2.1736e-04
Loss = 4.5579e-02, PNorm = 69.4524, GNorm = 0.6353, lr_0 = 2.1721e-04
Loss = 4.6650e-02, PNorm = 69.4565, GNorm = 0.4162, lr_0 = 2.1706e-04
Loss = 4.9869e-02, PNorm = 69.4600, GNorm = 0.4807, lr_0 = 2.1691e-04
Loss = 4.3510e-02, PNorm = 69.4638, GNorm = 0.3558, lr_0 = 2.1676e-04
Loss = 5.0804e-02, PNorm = 69.4688, GNorm = 0.5888, lr_0 = 2.1661e-04
Loss = 4.6724e-02, PNorm = 69.4721, GNorm = 0.6041, lr_0 = 2.1646e-04
Loss = 4.4584e-02, PNorm = 69.4750, GNorm = 0.6025, lr_0 = 2.1632e-04
Loss = 5.0076e-02, PNorm = 69.4781, GNorm = 0.7378, lr_0 = 2.1617e-04
Loss = 5.4852e-02, PNorm = 69.4835, GNorm = 0.6880, lr_0 = 2.1602e-04
Loss = 4.9854e-02, PNorm = 69.4855, GNorm = 0.8322, lr_0 = 2.1587e-04
Loss = 4.5044e-02, PNorm = 69.4877, GNorm = 0.5703, lr_0 = 2.1572e-04
Loss = 4.9664e-02, PNorm = 69.4923, GNorm = 0.7885, lr_0 = 2.1558e-04
Loss = 3.9488e-02, PNorm = 69.4957, GNorm = 0.4546, lr_0 = 2.1543e-04
Loss = 4.4045e-02, PNorm = 69.4993, GNorm = 0.5916, lr_0 = 2.1528e-04
Loss = 4.8897e-02, PNorm = 69.5026, GNorm = 0.6372, lr_0 = 2.1513e-04
Loss = 5.3919e-02, PNorm = 69.5070, GNorm = 0.8424, lr_0 = 2.1499e-04
Loss = 5.4445e-02, PNorm = 69.5102, GNorm = 0.6054, lr_0 = 2.1484e-04
Loss = 5.0099e-02, PNorm = 69.5133, GNorm = 0.6826, lr_0 = 2.1469e-04
Loss = 4.7764e-02, PNorm = 69.5176, GNorm = 0.6923, lr_0 = 2.1454e-04
Loss = 5.4007e-02, PNorm = 69.5213, GNorm = 0.4509, lr_0 = 2.1440e-04
Loss = 4.5795e-02, PNorm = 69.5248, GNorm = 0.4803, lr_0 = 2.1425e-04
Loss = 4.2569e-02, PNorm = 69.5291, GNorm = 0.3313, lr_0 = 2.1410e-04
Loss = 4.5317e-02, PNorm = 69.5334, GNorm = 0.5641, lr_0 = 2.1396e-04
Loss = 5.0731e-02, PNorm = 69.5365, GNorm = 0.6481, lr_0 = 2.1381e-04
Loss = 4.7736e-02, PNorm = 69.5404, GNorm = 0.6520, lr_0 = 2.1366e-04
Loss = 4.4232e-02, PNorm = 69.5418, GNorm = 0.5254, lr_0 = 2.1352e-04
Loss = 4.7140e-02, PNorm = 69.5443, GNorm = 0.3318, lr_0 = 2.1337e-04
Loss = 4.4847e-02, PNorm = 69.5465, GNorm = 0.4757, lr_0 = 2.1323e-04
Loss = 5.2700e-02, PNorm = 69.5488, GNorm = 0.4901, lr_0 = 2.1308e-04
Loss = 4.6581e-02, PNorm = 69.5508, GNorm = 0.8863, lr_0 = 2.1293e-04
Loss = 6.0751e-02, PNorm = 69.5547, GNorm = 1.0111, lr_0 = 2.1279e-04
Loss = 4.5517e-02, PNorm = 69.5606, GNorm = 0.3682, lr_0 = 2.1264e-04
Loss = 4.8799e-02, PNorm = 69.5644, GNorm = 0.4477, lr_0 = 2.1250e-04
Loss = 5.2975e-02, PNorm = 69.5675, GNorm = 0.4784, lr_0 = 2.1235e-04
Loss = 5.0952e-02, PNorm = 69.5704, GNorm = 0.6709, lr_0 = 2.1221e-04
Loss = 5.3800e-02, PNorm = 69.5765, GNorm = 0.3939, lr_0 = 2.1206e-04
Loss = 6.2037e-02, PNorm = 69.5821, GNorm = 0.5607, lr_0 = 2.1191e-04
Loss = 4.8931e-02, PNorm = 69.5851, GNorm = 0.5488, lr_0 = 2.1177e-04
Loss = 4.3776e-02, PNorm = 69.5877, GNorm = 0.5021, lr_0 = 2.1162e-04
Loss = 4.9847e-02, PNorm = 69.5912, GNorm = 0.9535, lr_0 = 2.1148e-04
Loss = 4.0988e-02, PNorm = 69.5941, GNorm = 0.6550, lr_0 = 2.1133e-04
Loss = 4.3443e-02, PNorm = 69.5971, GNorm = 0.5290, lr_0 = 2.1119e-04
Loss = 4.3207e-02, PNorm = 69.6015, GNorm = 0.6043, lr_0 = 2.1104e-04
Loss = 4.5721e-02, PNorm = 69.6051, GNorm = 0.4518, lr_0 = 2.1090e-04
Loss = 5.8851e-02, PNorm = 69.6087, GNorm = 0.5384, lr_0 = 2.1076e-04
Loss = 5.5957e-02, PNorm = 69.6132, GNorm = 0.9976, lr_0 = 2.1061e-04
Loss = 5.3111e-02, PNorm = 69.6162, GNorm = 0.6286, lr_0 = 2.1047e-04
Loss = 4.6904e-02, PNorm = 69.6197, GNorm = 0.5085, lr_0 = 2.1032e-04
Loss = 4.8763e-02, PNorm = 69.6228, GNorm = 0.4988, lr_0 = 2.1018e-04
Loss = 5.0960e-02, PNorm = 69.6258, GNorm = 0.5598, lr_0 = 2.1003e-04
Loss = 3.8248e-02, PNorm = 69.6296, GNorm = 0.5168, lr_0 = 2.0989e-04
Loss = 5.4063e-02, PNorm = 69.6327, GNorm = 0.5970, lr_0 = 2.0975e-04
Loss = 4.7222e-02, PNorm = 69.6353, GNorm = 0.7353, lr_0 = 2.0960e-04
Validation mae = 0.391301
Epoch 21
Loss = 3.8494e-02, PNorm = 69.6405, GNorm = 0.4770, lr_0 = 2.0946e-04
Loss = 3.3769e-02, PNorm = 69.6455, GNorm = 0.3545, lr_0 = 2.0932e-04
Loss = 4.0475e-02, PNorm = 69.6479, GNorm = 0.7049, lr_0 = 2.0917e-04
Loss = 3.7628e-02, PNorm = 69.6508, GNorm = 0.4241, lr_0 = 2.0903e-04
Loss = 3.6213e-02, PNorm = 69.6546, GNorm = 0.3709, lr_0 = 2.0889e-04
Loss = 4.1177e-02, PNorm = 69.6590, GNorm = 0.4547, lr_0 = 2.0874e-04
Loss = 4.1367e-02, PNorm = 69.6618, GNorm = 0.4937, lr_0 = 2.0860e-04
Loss = 4.5898e-02, PNorm = 69.6632, GNorm = 0.4969, lr_0 = 2.0846e-04
Loss = 5.0161e-02, PNorm = 69.6664, GNorm = 0.5484, lr_0 = 2.0831e-04
Loss = 3.8747e-02, PNorm = 69.6712, GNorm = 0.4811, lr_0 = 2.0817e-04
Loss = 3.6083e-02, PNorm = 69.6753, GNorm = 0.4814, lr_0 = 2.0803e-04
Loss = 3.9740e-02, PNorm = 69.6802, GNorm = 0.4142, lr_0 = 2.0789e-04
Loss = 4.6515e-02, PNorm = 69.6837, GNorm = 0.6093, lr_0 = 2.0774e-04
Loss = 4.6213e-02, PNorm = 69.6892, GNorm = 0.5811, lr_0 = 2.0760e-04
Loss = 4.1809e-02, PNorm = 69.6954, GNorm = 0.4831, lr_0 = 2.0746e-04
Loss = 4.0896e-02, PNorm = 69.6999, GNorm = 0.5417, lr_0 = 2.0732e-04
Loss = 4.4225e-02, PNorm = 69.7026, GNorm = 0.9154, lr_0 = 2.0718e-04
Loss = 4.9794e-02, PNorm = 69.7055, GNorm = 0.4425, lr_0 = 2.0703e-04
Loss = 3.8082e-02, PNorm = 69.7099, GNorm = 0.4891, lr_0 = 2.0689e-04
Loss = 4.1659e-02, PNorm = 69.7129, GNorm = 0.4411, lr_0 = 2.0675e-04
Loss = 4.2791e-02, PNorm = 69.7160, GNorm = 0.4624, lr_0 = 2.0661e-04
Loss = 3.5869e-02, PNorm = 69.7181, GNorm = 0.4191, lr_0 = 2.0647e-04
Loss = 3.7064e-02, PNorm = 69.7207, GNorm = 0.3489, lr_0 = 2.0633e-04
Loss = 4.0530e-02, PNorm = 69.7235, GNorm = 0.3191, lr_0 = 2.0618e-04
Loss = 4.2270e-02, PNorm = 69.7265, GNorm = 0.4280, lr_0 = 2.0604e-04
Loss = 3.8822e-02, PNorm = 69.7294, GNorm = 0.4040, lr_0 = 2.0590e-04
Loss = 4.6285e-02, PNorm = 69.7322, GNorm = 0.6671, lr_0 = 2.0576e-04
Loss = 3.8891e-02, PNorm = 69.7349, GNorm = 0.5972, lr_0 = 2.0562e-04
Loss = 4.8642e-02, PNorm = 69.7375, GNorm = 0.3520, lr_0 = 2.0548e-04
Loss = 3.5861e-02, PNorm = 69.7411, GNorm = 0.7171, lr_0 = 2.0534e-04
Loss = 3.8445e-02, PNorm = 69.7451, GNorm = 0.3705, lr_0 = 2.0520e-04
Loss = 4.6220e-02, PNorm = 69.7475, GNorm = 0.5699, lr_0 = 2.0506e-04
Loss = 4.9911e-02, PNorm = 69.7514, GNorm = 0.5252, lr_0 = 2.0492e-04
Loss = 3.8766e-02, PNorm = 69.7553, GNorm = 0.5352, lr_0 = 2.0478e-04
Loss = 4.6750e-02, PNorm = 69.7593, GNorm = 0.4733, lr_0 = 2.0464e-04
Loss = 3.9221e-02, PNorm = 69.7625, GNorm = 0.5509, lr_0 = 2.0450e-04
Loss = 4.3424e-02, PNorm = 69.7651, GNorm = 0.6846, lr_0 = 2.0436e-04
Loss = 4.4175e-02, PNorm = 69.7698, GNorm = 0.7186, lr_0 = 2.0422e-04
Loss = 4.0450e-02, PNorm = 69.7746, GNorm = 0.5388, lr_0 = 2.0408e-04
Loss = 4.3436e-02, PNorm = 69.7778, GNorm = 0.3561, lr_0 = 2.0394e-04
Loss = 4.0627e-02, PNorm = 69.7813, GNorm = 0.5944, lr_0 = 2.0380e-04
Loss = 4.5184e-02, PNorm = 69.7832, GNorm = 0.4631, lr_0 = 2.0366e-04
Loss = 4.3068e-02, PNorm = 69.7870, GNorm = 0.5703, lr_0 = 2.0352e-04
Loss = 4.0211e-02, PNorm = 69.7928, GNorm = 0.4542, lr_0 = 2.0338e-04
Loss = 4.1163e-02, PNorm = 69.7958, GNorm = 0.5975, lr_0 = 2.0324e-04
Loss = 4.3966e-02, PNorm = 69.7988, GNorm = 0.4730, lr_0 = 2.0310e-04
Loss = 3.5747e-02, PNorm = 69.8010, GNorm = 0.5766, lr_0 = 2.0296e-04
Loss = 4.8269e-02, PNorm = 69.8048, GNorm = 0.5219, lr_0 = 2.0282e-04
Loss = 5.4915e-02, PNorm = 69.8091, GNorm = 0.7803, lr_0 = 2.0268e-04
Loss = 4.3396e-02, PNorm = 69.8137, GNorm = 0.3891, lr_0 = 2.0254e-04
Loss = 3.8921e-02, PNorm = 69.8166, GNorm = 0.4395, lr_0 = 2.0240e-04
Loss = 4.5363e-02, PNorm = 69.8191, GNorm = 0.8890, lr_0 = 2.0227e-04
Loss = 4.7112e-02, PNorm = 69.8227, GNorm = 0.5369, lr_0 = 2.0213e-04
Loss = 4.8625e-02, PNorm = 69.8263, GNorm = 0.4155, lr_0 = 2.0199e-04
Loss = 4.6668e-02, PNorm = 69.8300, GNorm = 0.6581, lr_0 = 2.0185e-04
Loss = 5.0209e-02, PNorm = 69.8348, GNorm = 0.5322, lr_0 = 2.0171e-04
Loss = 5.1136e-02, PNorm = 69.8378, GNorm = 0.6350, lr_0 = 2.0157e-04
Loss = 4.9125e-02, PNorm = 69.8411, GNorm = 0.4751, lr_0 = 2.0144e-04
Loss = 5.6579e-02, PNorm = 69.8449, GNorm = 0.7008, lr_0 = 2.0130e-04
Loss = 4.4695e-02, PNorm = 69.8512, GNorm = 0.6856, lr_0 = 2.0116e-04
Loss = 4.2537e-02, PNorm = 69.8554, GNorm = 0.5038, lr_0 = 2.0102e-04
Loss = 4.6154e-02, PNorm = 69.8582, GNorm = 0.7303, lr_0 = 2.0088e-04
Loss = 4.4354e-02, PNorm = 69.8603, GNorm = 0.6648, lr_0 = 2.0075e-04
Loss = 4.2159e-02, PNorm = 69.8621, GNorm = 0.5267, lr_0 = 2.0061e-04
Loss = 5.6030e-02, PNorm = 69.8658, GNorm = 0.4387, lr_0 = 2.0047e-04
Loss = 5.2980e-02, PNorm = 69.8695, GNorm = 0.6408, lr_0 = 2.0033e-04
Loss = 4.9537e-02, PNorm = 69.8728, GNorm = 0.8176, lr_0 = 2.0020e-04
Loss = 3.9833e-02, PNorm = 69.8748, GNorm = 0.4807, lr_0 = 2.0006e-04
Loss = 3.9448e-02, PNorm = 69.8775, GNorm = 0.4967, lr_0 = 1.9992e-04
Loss = 4.5837e-02, PNorm = 69.8814, GNorm = 0.5270, lr_0 = 1.9979e-04
Loss = 4.5461e-02, PNorm = 69.8849, GNorm = 0.9260, lr_0 = 1.9965e-04
Loss = 4.3581e-02, PNorm = 69.8867, GNorm = 0.8682, lr_0 = 1.9951e-04
Loss = 4.1577e-02, PNorm = 69.8894, GNorm = 0.4166, lr_0 = 1.9938e-04
Loss = 5.2583e-02, PNorm = 69.8929, GNorm = 0.5551, lr_0 = 1.9924e-04
Loss = 3.8986e-02, PNorm = 69.8954, GNorm = 0.4020, lr_0 = 1.9910e-04
Loss = 4.9153e-02, PNorm = 69.8967, GNorm = 0.4668, lr_0 = 1.9897e-04
Loss = 4.8325e-02, PNorm = 69.8996, GNorm = 0.6131, lr_0 = 1.9883e-04
Loss = 4.2322e-02, PNorm = 69.9024, GNorm = 0.6714, lr_0 = 1.9869e-04
Loss = 4.2649e-02, PNorm = 69.9049, GNorm = 0.6682, lr_0 = 1.9856e-04
Loss = 5.1666e-02, PNorm = 69.9083, GNorm = 0.7960, lr_0 = 1.9842e-04
Loss = 5.1495e-02, PNorm = 69.9110, GNorm = 0.5662, lr_0 = 1.9829e-04
Loss = 4.1016e-02, PNorm = 69.9135, GNorm = 0.3933, lr_0 = 1.9815e-04
Loss = 3.7840e-02, PNorm = 69.9152, GNorm = 0.5741, lr_0 = 1.9801e-04
Loss = 4.5231e-02, PNorm = 69.9179, GNorm = 0.4647, lr_0 = 1.9788e-04
Loss = 3.9141e-02, PNorm = 69.9217, GNorm = 0.4713, lr_0 = 1.9774e-04
Loss = 4.7228e-02, PNorm = 69.9236, GNorm = 0.9812, lr_0 = 1.9761e-04
Loss = 4.4829e-02, PNorm = 69.9258, GNorm = 0.6010, lr_0 = 1.9747e-04
Loss = 3.8559e-02, PNorm = 69.9283, GNorm = 0.3868, lr_0 = 1.9734e-04
Loss = 4.5031e-02, PNorm = 69.9316, GNorm = 0.5406, lr_0 = 1.9720e-04
Loss = 4.1634e-02, PNorm = 69.9364, GNorm = 0.5046, lr_0 = 1.9707e-04
Loss = 4.4867e-02, PNorm = 69.9397, GNorm = 0.5356, lr_0 = 1.9693e-04
Loss = 4.4298e-02, PNorm = 69.9415, GNorm = 0.5312, lr_0 = 1.9680e-04
Loss = 4.5382e-02, PNorm = 69.9442, GNorm = 0.5561, lr_0 = 1.9666e-04
Loss = 4.7194e-02, PNorm = 69.9482, GNorm = 0.5472, lr_0 = 1.9653e-04
Loss = 5.1225e-02, PNorm = 69.9520, GNorm = 0.4695, lr_0 = 1.9639e-04
Loss = 4.3997e-02, PNorm = 69.9542, GNorm = 0.7744, lr_0 = 1.9626e-04
Loss = 4.6358e-02, PNorm = 69.9553, GNorm = 0.4968, lr_0 = 1.9612e-04
Loss = 4.8556e-02, PNorm = 69.9576, GNorm = 0.6504, lr_0 = 1.9599e-04
Loss = 4.7088e-02, PNorm = 69.9603, GNorm = 0.5065, lr_0 = 1.9585e-04
Loss = 3.9569e-02, PNorm = 69.9638, GNorm = 0.4690, lr_0 = 1.9572e-04
Loss = 4.8895e-02, PNorm = 69.9664, GNorm = 0.8089, lr_0 = 1.9559e-04
Loss = 4.0326e-02, PNorm = 69.9670, GNorm = 0.6003, lr_0 = 1.9545e-04
Loss = 5.0848e-02, PNorm = 69.9688, GNorm = 0.4886, lr_0 = 1.9532e-04
Loss = 4.9029e-02, PNorm = 69.9726, GNorm = 0.7267, lr_0 = 1.9518e-04
Loss = 4.1687e-02, PNorm = 69.9760, GNorm = 0.5660, lr_0 = 1.9505e-04
Loss = 5.0089e-02, PNorm = 69.9791, GNorm = 1.0969, lr_0 = 1.9492e-04
Loss = 4.6979e-02, PNorm = 69.9822, GNorm = 0.4643, lr_0 = 1.9478e-04
Loss = 4.1902e-02, PNorm = 69.9860, GNorm = 0.5311, lr_0 = 1.9465e-04
Loss = 4.6084e-02, PNorm = 69.9909, GNorm = 0.4292, lr_0 = 1.9452e-04
Loss = 4.3106e-02, PNorm = 69.9956, GNorm = 0.4353, lr_0 = 1.9438e-04
Loss = 4.3879e-02, PNorm = 69.9986, GNorm = 0.4511, lr_0 = 1.9425e-04
Loss = 4.8546e-02, PNorm = 70.0011, GNorm = 0.5322, lr_0 = 1.9412e-04
Loss = 4.8102e-02, PNorm = 70.0020, GNorm = 0.3696, lr_0 = 1.9398e-04
Loss = 4.3995e-02, PNorm = 70.0033, GNorm = 0.6859, lr_0 = 1.9385e-04
Loss = 4.0299e-02, PNorm = 70.0059, GNorm = 0.5602, lr_0 = 1.9372e-04
Loss = 4.6442e-02, PNorm = 70.0087, GNorm = 0.5605, lr_0 = 1.9359e-04
Loss = 4.4873e-02, PNorm = 70.0118, GNorm = 0.7642, lr_0 = 1.9345e-04
Loss = 4.3786e-02, PNorm = 70.0154, GNorm = 0.5081, lr_0 = 1.9332e-04
Loss = 5.0509e-02, PNorm = 70.0185, GNorm = 0.7979, lr_0 = 1.9319e-04
Loss = 4.7380e-02, PNorm = 70.0223, GNorm = 0.5625, lr_0 = 1.9306e-04
Validation mae = 0.388300
Epoch 22
Loss = 3.9057e-02, PNorm = 70.0257, GNorm = 0.7025, lr_0 = 1.9292e-04
Loss = 3.3364e-02, PNorm = 70.0299, GNorm = 0.5418, lr_0 = 1.9279e-04
Loss = 4.1665e-02, PNorm = 70.0320, GNorm = 0.3687, lr_0 = 1.9266e-04
Loss = 3.7609e-02, PNorm = 70.0337, GNorm = 0.4828, lr_0 = 1.9253e-04
Loss = 4.0938e-02, PNorm = 70.0359, GNorm = 0.5959, lr_0 = 1.9240e-04
Loss = 4.3735e-02, PNorm = 70.0372, GNorm = 0.4832, lr_0 = 1.9226e-04
Loss = 3.8890e-02, PNorm = 70.0406, GNorm = 0.5290, lr_0 = 1.9213e-04
Loss = 4.0923e-02, PNorm = 70.0446, GNorm = 0.5669, lr_0 = 1.9200e-04
Loss = 3.4749e-02, PNorm = 70.0465, GNorm = 0.5660, lr_0 = 1.9187e-04
Loss = 4.4238e-02, PNorm = 70.0491, GNorm = 0.5435, lr_0 = 1.9174e-04
Loss = 4.0576e-02, PNorm = 70.0528, GNorm = 0.5463, lr_0 = 1.9161e-04
Loss = 3.8629e-02, PNorm = 70.0554, GNorm = 0.5204, lr_0 = 1.9148e-04
Loss = 4.6991e-02, PNorm = 70.0584, GNorm = 0.7577, lr_0 = 1.9134e-04
Loss = 4.1582e-02, PNorm = 70.0613, GNorm = 0.6608, lr_0 = 1.9121e-04
Loss = 4.0046e-02, PNorm = 70.0650, GNorm = 0.5566, lr_0 = 1.9108e-04
Loss = 3.5071e-02, PNorm = 70.0679, GNorm = 0.5459, lr_0 = 1.9095e-04
Loss = 3.6907e-02, PNorm = 70.0697, GNorm = 0.4849, lr_0 = 1.9082e-04
Loss = 3.6959e-02, PNorm = 70.0726, GNorm = 0.5253, lr_0 = 1.9069e-04
Loss = 4.1413e-02, PNorm = 70.0739, GNorm = 0.4527, lr_0 = 1.9056e-04
Loss = 4.1701e-02, PNorm = 70.0768, GNorm = 0.4270, lr_0 = 1.9043e-04
Loss = 3.3859e-02, PNorm = 70.0800, GNorm = 0.3556, lr_0 = 1.9030e-04
Loss = 4.1801e-02, PNorm = 70.0838, GNorm = 0.3830, lr_0 = 1.9017e-04
Loss = 4.0946e-02, PNorm = 70.0872, GNorm = 0.4103, lr_0 = 1.9004e-04
Loss = 4.0715e-02, PNorm = 70.0904, GNorm = 0.4428, lr_0 = 1.8991e-04
Loss = 4.8176e-02, PNorm = 70.0940, GNorm = 0.4296, lr_0 = 1.8978e-04
Loss = 3.5770e-02, PNorm = 70.0970, GNorm = 0.4889, lr_0 = 1.8965e-04
Loss = 3.6506e-02, PNorm = 70.0997, GNorm = 0.4801, lr_0 = 1.8952e-04
Loss = 3.9426e-02, PNorm = 70.1001, GNorm = 0.3641, lr_0 = 1.8939e-04
Loss = 3.8668e-02, PNorm = 70.1042, GNorm = 0.4184, lr_0 = 1.8926e-04
Loss = 3.7856e-02, PNorm = 70.1098, GNorm = 0.3973, lr_0 = 1.8913e-04
Loss = 3.9833e-02, PNorm = 70.1131, GNorm = 0.4552, lr_0 = 1.8900e-04
Loss = 3.7646e-02, PNorm = 70.1166, GNorm = 0.6157, lr_0 = 1.8887e-04
Loss = 4.3189e-02, PNorm = 70.1204, GNorm = 0.6339, lr_0 = 1.8874e-04
Loss = 4.5798e-02, PNorm = 70.1240, GNorm = 0.6941, lr_0 = 1.8861e-04
Loss = 3.9540e-02, PNorm = 70.1272, GNorm = 0.4212, lr_0 = 1.8848e-04
Loss = 4.0588e-02, PNorm = 70.1300, GNorm = 0.5001, lr_0 = 1.8835e-04
Loss = 4.4689e-02, PNorm = 70.1319, GNorm = 0.5910, lr_0 = 1.8822e-04
Loss = 3.6285e-02, PNorm = 70.1353, GNorm = 0.4326, lr_0 = 1.8809e-04
Loss = 4.1598e-02, PNorm = 70.1386, GNorm = 0.4528, lr_0 = 1.8797e-04
Loss = 3.8557e-02, PNorm = 70.1406, GNorm = 0.5863, lr_0 = 1.8784e-04
Loss = 4.1471e-02, PNorm = 70.1430, GNorm = 0.4866, lr_0 = 1.8771e-04
Loss = 3.9162e-02, PNorm = 70.1452, GNorm = 0.4970, lr_0 = 1.8758e-04
Loss = 4.3079e-02, PNorm = 70.1475, GNorm = 0.4674, lr_0 = 1.8745e-04
Loss = 4.0548e-02, PNorm = 70.1508, GNorm = 0.3704, lr_0 = 1.8732e-04
Loss = 4.5055e-02, PNorm = 70.1552, GNorm = 0.4523, lr_0 = 1.8719e-04
Loss = 4.6691e-02, PNorm = 70.1575, GNorm = 0.8416, lr_0 = 1.8707e-04
Loss = 3.8471e-02, PNorm = 70.1615, GNorm = 0.3815, lr_0 = 1.8694e-04
Loss = 4.2616e-02, PNorm = 70.1657, GNorm = 0.4936, lr_0 = 1.8681e-04
Loss = 4.6009e-02, PNorm = 70.1691, GNorm = 0.4987, lr_0 = 1.8668e-04
Loss = 4.0227e-02, PNorm = 70.1698, GNorm = 0.4656, lr_0 = 1.8655e-04
Loss = 4.6752e-02, PNorm = 70.1713, GNorm = 0.5575, lr_0 = 1.8643e-04
Loss = 4.1739e-02, PNorm = 70.1748, GNorm = 0.4271, lr_0 = 1.8630e-04
Loss = 4.7623e-02, PNorm = 70.1786, GNorm = 0.5291, lr_0 = 1.8617e-04
Loss = 4.8931e-02, PNorm = 70.1829, GNorm = 0.9555, lr_0 = 1.8604e-04
Loss = 4.4384e-02, PNorm = 70.1887, GNorm = 0.4762, lr_0 = 1.8592e-04
Loss = 4.3868e-02, PNorm = 70.1939, GNorm = 0.5018, lr_0 = 1.8579e-04
Loss = 3.8402e-02, PNorm = 70.1962, GNorm = 0.5716, lr_0 = 1.8566e-04
Loss = 4.0199e-02, PNorm = 70.1981, GNorm = 0.4925, lr_0 = 1.8553e-04
Loss = 4.2660e-02, PNorm = 70.2015, GNorm = 0.7382, lr_0 = 1.8541e-04
Loss = 4.2546e-02, PNorm = 70.2037, GNorm = 0.6107, lr_0 = 1.8528e-04
Loss = 4.1233e-02, PNorm = 70.2065, GNorm = 0.5674, lr_0 = 1.8515e-04
Loss = 4.0751e-02, PNorm = 70.2118, GNorm = 0.4514, lr_0 = 1.8503e-04
Loss = 4.5403e-02, PNorm = 70.2142, GNorm = 0.5726, lr_0 = 1.8490e-04
Loss = 4.0511e-02, PNorm = 70.2166, GNorm = 0.5227, lr_0 = 1.8477e-04
Loss = 3.9190e-02, PNorm = 70.2197, GNorm = 0.7819, lr_0 = 1.8465e-04
Loss = 4.5089e-02, PNorm = 70.2230, GNorm = 0.7312, lr_0 = 1.8452e-04
Loss = 4.3302e-02, PNorm = 70.2275, GNorm = 0.4795, lr_0 = 1.8439e-04
Loss = 4.1055e-02, PNorm = 70.2293, GNorm = 0.5286, lr_0 = 1.8427e-04
Loss = 4.8667e-02, PNorm = 70.2312, GNorm = 0.5069, lr_0 = 1.8414e-04
Loss = 3.6840e-02, PNorm = 70.2344, GNorm = 0.4690, lr_0 = 1.8401e-04
Loss = 3.7397e-02, PNorm = 70.2360, GNorm = 0.4921, lr_0 = 1.8389e-04
Loss = 4.5218e-02, PNorm = 70.2395, GNorm = 0.5892, lr_0 = 1.8376e-04
Loss = 4.4179e-02, PNorm = 70.2421, GNorm = 0.5766, lr_0 = 1.8364e-04
Loss = 4.1342e-02, PNorm = 70.2443, GNorm = 0.5888, lr_0 = 1.8351e-04
Loss = 4.0501e-02, PNorm = 70.2465, GNorm = 0.4152, lr_0 = 1.8338e-04
Loss = 4.1208e-02, PNorm = 70.2500, GNorm = 0.5140, lr_0 = 1.8326e-04
Loss = 4.3478e-02, PNorm = 70.2545, GNorm = 0.7098, lr_0 = 1.8313e-04
Loss = 3.8495e-02, PNorm = 70.2577, GNorm = 0.4703, lr_0 = 1.8301e-04
Loss = 4.3974e-02, PNorm = 70.2607, GNorm = 0.3725, lr_0 = 1.8288e-04
Loss = 4.3498e-02, PNorm = 70.2641, GNorm = 0.5458, lr_0 = 1.8276e-04
Loss = 3.9293e-02, PNorm = 70.2655, GNorm = 0.5463, lr_0 = 1.8263e-04
Loss = 3.7797e-02, PNorm = 70.2677, GNorm = 0.5983, lr_0 = 1.8251e-04
Loss = 3.9396e-02, PNorm = 70.2692, GNorm = 0.7737, lr_0 = 1.8238e-04
Loss = 3.6435e-02, PNorm = 70.2703, GNorm = 0.5334, lr_0 = 1.8226e-04
Loss = 4.2820e-02, PNorm = 70.2729, GNorm = 0.3429, lr_0 = 1.8213e-04
Loss = 4.3921e-02, PNorm = 70.2750, GNorm = 0.4421, lr_0 = 1.8201e-04
Loss = 4.0629e-02, PNorm = 70.2777, GNorm = 0.4706, lr_0 = 1.8188e-04
Loss = 4.6086e-02, PNorm = 70.2789, GNorm = 0.5862, lr_0 = 1.8176e-04
Loss = 4.2361e-02, PNorm = 70.2812, GNorm = 0.5877, lr_0 = 1.8163e-04
Loss = 3.8048e-02, PNorm = 70.2848, GNorm = 0.4311, lr_0 = 1.8151e-04
Loss = 4.5456e-02, PNorm = 70.2878, GNorm = 0.6546, lr_0 = 1.8138e-04
Loss = 4.4206e-02, PNorm = 70.2910, GNorm = 0.4563, lr_0 = 1.8126e-04
Loss = 4.9038e-02, PNorm = 70.2935, GNorm = 0.4574, lr_0 = 1.8114e-04
Loss = 4.3880e-02, PNorm = 70.2970, GNorm = 0.5598, lr_0 = 1.8101e-04
Loss = 4.1515e-02, PNorm = 70.2992, GNorm = 0.6001, lr_0 = 1.8089e-04
Loss = 4.0637e-02, PNorm = 70.3022, GNorm = 0.8293, lr_0 = 1.8076e-04
Loss = 5.0204e-02, PNorm = 70.3032, GNorm = 0.5817, lr_0 = 1.8064e-04
Loss = 4.4849e-02, PNorm = 70.3049, GNorm = 0.5893, lr_0 = 1.8052e-04
Loss = 4.5547e-02, PNorm = 70.3064, GNorm = 0.7505, lr_0 = 1.8039e-04
Loss = 3.5735e-02, PNorm = 70.3095, GNorm = 0.7544, lr_0 = 1.8027e-04
Loss = 3.8881e-02, PNorm = 70.3131, GNorm = 0.4766, lr_0 = 1.8015e-04
Loss = 4.0597e-02, PNorm = 70.3140, GNorm = 0.5535, lr_0 = 1.8002e-04
Loss = 4.8374e-02, PNorm = 70.3158, GNorm = 0.4028, lr_0 = 1.7990e-04
Loss = 4.5709e-02, PNorm = 70.3194, GNorm = 0.4425, lr_0 = 1.7978e-04
Loss = 5.6573e-02, PNorm = 70.3219, GNorm = 0.5177, lr_0 = 1.7965e-04
Loss = 3.9850e-02, PNorm = 70.3261, GNorm = 0.5817, lr_0 = 1.7953e-04
Loss = 4.3316e-02, PNorm = 70.3299, GNorm = 0.4327, lr_0 = 1.7941e-04
Loss = 5.0073e-02, PNorm = 70.3313, GNorm = 0.8330, lr_0 = 1.7928e-04
Loss = 4.2174e-02, PNorm = 70.3330, GNorm = 0.6041, lr_0 = 1.7916e-04
Loss = 4.7949e-02, PNorm = 70.3352, GNorm = 0.6614, lr_0 = 1.7904e-04
Loss = 4.6856e-02, PNorm = 70.3380, GNorm = 0.6057, lr_0 = 1.7892e-04
Loss = 3.8653e-02, PNorm = 70.3418, GNorm = 0.6030, lr_0 = 1.7879e-04
Loss = 3.7947e-02, PNorm = 70.3452, GNorm = 0.3877, lr_0 = 1.7867e-04
Loss = 4.3232e-02, PNorm = 70.3474, GNorm = 0.4509, lr_0 = 1.7855e-04
Loss = 5.1553e-02, PNorm = 70.3487, GNorm = 0.6282, lr_0 = 1.7843e-04
Loss = 3.8667e-02, PNorm = 70.3504, GNorm = 0.7125, lr_0 = 1.7830e-04
Loss = 4.4420e-02, PNorm = 70.3532, GNorm = 0.4789, lr_0 = 1.7818e-04
Loss = 4.1099e-02, PNorm = 70.3571, GNorm = 0.4374, lr_0 = 1.7806e-04
Loss = 4.7975e-02, PNorm = 70.3588, GNorm = 0.7641, lr_0 = 1.7794e-04
Loss = 4.7392e-02, PNorm = 70.3621, GNorm = 0.5362, lr_0 = 1.7782e-04
Validation mae = 0.391877
Epoch 23
Loss = 3.6305e-02, PNorm = 70.3684, GNorm = 0.3482, lr_0 = 1.7769e-04
Loss = 3.5000e-02, PNorm = 70.3715, GNorm = 0.5873, lr_0 = 1.7757e-04
Loss = 3.4027e-02, PNorm = 70.3741, GNorm = 0.4886, lr_0 = 1.7745e-04
Loss = 3.7833e-02, PNorm = 70.3778, GNorm = 0.4160, lr_0 = 1.7733e-04
Loss = 4.1560e-02, PNorm = 70.3815, GNorm = 0.7762, lr_0 = 1.7721e-04
Loss = 3.7713e-02, PNorm = 70.3843, GNorm = 0.7166, lr_0 = 1.7709e-04
Loss = 3.2485e-02, PNorm = 70.3864, GNorm = 0.6914, lr_0 = 1.7696e-04
Loss = 3.6423e-02, PNorm = 70.3893, GNorm = 0.6231, lr_0 = 1.7684e-04
Loss = 3.3073e-02, PNorm = 70.3913, GNorm = 0.4599, lr_0 = 1.7672e-04
Loss = 3.8483e-02, PNorm = 70.3950, GNorm = 0.4950, lr_0 = 1.7660e-04
Loss = 3.7007e-02, PNorm = 70.3990, GNorm = 0.4573, lr_0 = 1.7648e-04
Loss = 4.0989e-02, PNorm = 70.4012, GNorm = 0.5929, lr_0 = 1.7636e-04
Loss = 3.5541e-02, PNorm = 70.4031, GNorm = 0.3939, lr_0 = 1.7624e-04
Loss = 3.3281e-02, PNorm = 70.4061, GNorm = 0.4430, lr_0 = 1.7612e-04
Loss = 4.1324e-02, PNorm = 70.4089, GNorm = 0.5547, lr_0 = 1.7600e-04
Loss = 4.3928e-02, PNorm = 70.4120, GNorm = 0.4358, lr_0 = 1.7588e-04
Loss = 4.0810e-02, PNorm = 70.4151, GNorm = 0.5465, lr_0 = 1.7576e-04
Loss = 4.2691e-02, PNorm = 70.4167, GNorm = 0.5271, lr_0 = 1.7564e-04
Loss = 3.4353e-02, PNorm = 70.4194, GNorm = 0.4567, lr_0 = 1.7552e-04
Loss = 3.1819e-02, PNorm = 70.4218, GNorm = 0.4706, lr_0 = 1.7540e-04
Loss = 3.7913e-02, PNorm = 70.4248, GNorm = 0.7185, lr_0 = 1.7528e-04
Loss = 3.8468e-02, PNorm = 70.4271, GNorm = 0.4850, lr_0 = 1.7516e-04
Loss = 3.8245e-02, PNorm = 70.4310, GNorm = 0.4403, lr_0 = 1.7504e-04
Loss = 3.9764e-02, PNorm = 70.4349, GNorm = 0.4719, lr_0 = 1.7492e-04
Loss = 3.9681e-02, PNorm = 70.4372, GNorm = 0.6220, lr_0 = 1.7480e-04
Loss = 3.5602e-02, PNorm = 70.4396, GNorm = 0.5142, lr_0 = 1.7468e-04
Loss = 3.8142e-02, PNorm = 70.4424, GNorm = 0.6243, lr_0 = 1.7456e-04
Loss = 3.6394e-02, PNorm = 70.4445, GNorm = 0.4405, lr_0 = 1.7444e-04
Loss = 3.8846e-02, PNorm = 70.4474, GNorm = 0.5973, lr_0 = 1.7432e-04
Loss = 3.9082e-02, PNorm = 70.4496, GNorm = 0.5204, lr_0 = 1.7420e-04
Loss = 4.0871e-02, PNorm = 70.4526, GNorm = 0.5596, lr_0 = 1.7408e-04
Loss = 3.6648e-02, PNorm = 70.4556, GNorm = 0.6652, lr_0 = 1.7396e-04
Loss = 3.9949e-02, PNorm = 70.4578, GNorm = 0.8744, lr_0 = 1.7384e-04
Loss = 4.0334e-02, PNorm = 70.4602, GNorm = 0.5123, lr_0 = 1.7372e-04
Loss = 3.7969e-02, PNorm = 70.4621, GNorm = 0.6709, lr_0 = 1.7360e-04
Loss = 3.6549e-02, PNorm = 70.4639, GNorm = 0.5787, lr_0 = 1.7348e-04
Loss = 3.5432e-02, PNorm = 70.4659, GNorm = 0.4315, lr_0 = 1.7336e-04
Loss = 3.9319e-02, PNorm = 70.4691, GNorm = 0.4518, lr_0 = 1.7325e-04
Loss = 3.9325e-02, PNorm = 70.4720, GNorm = 0.3308, lr_0 = 1.7313e-04
Loss = 3.3211e-02, PNorm = 70.4738, GNorm = 0.5219, lr_0 = 1.7301e-04
Loss = 4.0710e-02, PNorm = 70.4760, GNorm = 0.6361, lr_0 = 1.7289e-04
Loss = 3.5391e-02, PNorm = 70.4797, GNorm = 0.3634, lr_0 = 1.7277e-04
Loss = 4.0476e-02, PNorm = 70.4817, GNorm = 0.5548, lr_0 = 1.7265e-04
Loss = 4.0608e-02, PNorm = 70.4834, GNorm = 0.5534, lr_0 = 1.7253e-04
Loss = 3.9954e-02, PNorm = 70.4864, GNorm = 0.4406, lr_0 = 1.7242e-04
Loss = 3.7016e-02, PNorm = 70.4870, GNorm = 0.5060, lr_0 = 1.7230e-04
Loss = 3.9275e-02, PNorm = 70.4887, GNorm = 0.4654, lr_0 = 1.7218e-04
Loss = 4.2582e-02, PNorm = 70.4905, GNorm = 0.5701, lr_0 = 1.7206e-04
Loss = 4.2082e-02, PNorm = 70.4926, GNorm = 0.4457, lr_0 = 1.7194e-04
Loss = 4.3613e-02, PNorm = 70.4964, GNorm = 0.8140, lr_0 = 1.7183e-04
Loss = 4.1596e-02, PNorm = 70.5007, GNorm = 0.5990, lr_0 = 1.7171e-04
Loss = 4.5056e-02, PNorm = 70.5043, GNorm = 0.7150, lr_0 = 1.7159e-04
Loss = 4.4086e-02, PNorm = 70.5062, GNorm = 0.4295, lr_0 = 1.7147e-04
Loss = 4.3449e-02, PNorm = 70.5094, GNorm = 0.3754, lr_0 = 1.7136e-04
Loss = 3.8741e-02, PNorm = 70.5125, GNorm = 0.7440, lr_0 = 1.7124e-04
Loss = 3.7115e-02, PNorm = 70.5155, GNorm = 0.4486, lr_0 = 1.7112e-04
Loss = 3.8472e-02, PNorm = 70.5187, GNorm = 0.6705, lr_0 = 1.7100e-04
Loss = 4.5679e-02, PNorm = 70.5216, GNorm = 0.6701, lr_0 = 1.7089e-04
Loss = 4.3518e-02, PNorm = 70.5224, GNorm = 0.8578, lr_0 = 1.7077e-04
Loss = 4.2365e-02, PNorm = 70.5242, GNorm = 0.7933, lr_0 = 1.7065e-04
Loss = 3.8843e-02, PNorm = 70.5272, GNorm = 0.6684, lr_0 = 1.7054e-04
Loss = 4.1477e-02, PNorm = 70.5301, GNorm = 0.4665, lr_0 = 1.7042e-04
Loss = 3.6131e-02, PNorm = 70.5324, GNorm = 0.4615, lr_0 = 1.7030e-04
Loss = 4.4085e-02, PNorm = 70.5338, GNorm = 0.5181, lr_0 = 1.7019e-04
Loss = 3.6607e-02, PNorm = 70.5356, GNorm = 0.3601, lr_0 = 1.7007e-04
Loss = 4.2618e-02, PNorm = 70.5374, GNorm = 0.5643, lr_0 = 1.6995e-04
Loss = 4.1149e-02, PNorm = 70.5404, GNorm = 0.4258, lr_0 = 1.6984e-04
Loss = 3.8112e-02, PNorm = 70.5432, GNorm = 0.6042, lr_0 = 1.6972e-04
Loss = 3.3368e-02, PNorm = 70.5463, GNorm = 0.4866, lr_0 = 1.6960e-04
Loss = 3.9986e-02, PNorm = 70.5499, GNorm = 0.5369, lr_0 = 1.6949e-04
Loss = 4.0797e-02, PNorm = 70.5522, GNorm = 0.5545, lr_0 = 1.6937e-04
Loss = 4.2944e-02, PNorm = 70.5561, GNorm = 0.6580, lr_0 = 1.6926e-04
Loss = 4.6418e-02, PNorm = 70.5588, GNorm = 0.5951, lr_0 = 1.6914e-04
Loss = 3.6853e-02, PNorm = 70.5618, GNorm = 0.6226, lr_0 = 1.6902e-04
Loss = 4.2488e-02, PNorm = 70.5660, GNorm = 0.4862, lr_0 = 1.6891e-04
Loss = 3.9915e-02, PNorm = 70.5694, GNorm = 0.4192, lr_0 = 1.6879e-04
Loss = 4.3557e-02, PNorm = 70.5730, GNorm = 0.4059, lr_0 = 1.6868e-04
Loss = 4.0809e-02, PNorm = 70.5752, GNorm = 0.3846, lr_0 = 1.6856e-04
Loss = 3.8780e-02, PNorm = 70.5765, GNorm = 0.5071, lr_0 = 1.6845e-04
Loss = 3.8893e-02, PNorm = 70.5785, GNorm = 0.3865, lr_0 = 1.6833e-04
Loss = 4.2606e-02, PNorm = 70.5809, GNorm = 0.6570, lr_0 = 1.6821e-04
Loss = 3.9535e-02, PNorm = 70.5857, GNorm = 0.4903, lr_0 = 1.6810e-04
Loss = 3.7701e-02, PNorm = 70.5876, GNorm = 0.5931, lr_0 = 1.6798e-04
Loss = 3.4211e-02, PNorm = 70.5895, GNorm = 0.5367, lr_0 = 1.6787e-04
Loss = 4.9418e-02, PNorm = 70.5916, GNorm = 0.5247, lr_0 = 1.6775e-04
Loss = 4.0036e-02, PNorm = 70.5930, GNorm = 0.6158, lr_0 = 1.6764e-04
Loss = 4.1073e-02, PNorm = 70.5955, GNorm = 0.4692, lr_0 = 1.6752e-04
Loss = 4.0069e-02, PNorm = 70.5970, GNorm = 0.4668, lr_0 = 1.6741e-04
Loss = 3.7372e-02, PNorm = 70.5982, GNorm = 0.4209, lr_0 = 1.6729e-04
Loss = 3.8603e-02, PNorm = 70.6015, GNorm = 0.6183, lr_0 = 1.6718e-04
Loss = 3.7674e-02, PNorm = 70.6054, GNorm = 0.5015, lr_0 = 1.6707e-04
Loss = 4.3731e-02, PNorm = 70.6072, GNorm = 0.4426, lr_0 = 1.6695e-04
Loss = 4.0221e-02, PNorm = 70.6107, GNorm = 0.4219, lr_0 = 1.6684e-04
Loss = 3.4906e-02, PNorm = 70.6131, GNorm = 0.5570, lr_0 = 1.6672e-04
Loss = 3.8125e-02, PNorm = 70.6154, GNorm = 0.5984, lr_0 = 1.6661e-04
Loss = 3.5402e-02, PNorm = 70.6184, GNorm = 0.3676, lr_0 = 1.6649e-04
Loss = 3.5005e-02, PNorm = 70.6207, GNorm = 0.3697, lr_0 = 1.6638e-04
Loss = 4.8329e-02, PNorm = 70.6233, GNorm = 0.6383, lr_0 = 1.6627e-04
Loss = 4.5754e-02, PNorm = 70.6270, GNorm = 0.5766, lr_0 = 1.6615e-04
Loss = 3.8656e-02, PNorm = 70.6323, GNorm = 0.6454, lr_0 = 1.6604e-04
Loss = 3.9627e-02, PNorm = 70.6351, GNorm = 0.5538, lr_0 = 1.6592e-04
Loss = 3.8285e-02, PNorm = 70.6361, GNorm = 0.4604, lr_0 = 1.6581e-04
Loss = 4.3177e-02, PNorm = 70.6366, GNorm = 0.5003, lr_0 = 1.6570e-04
Loss = 3.8270e-02, PNorm = 70.6382, GNorm = 0.4687, lr_0 = 1.6558e-04
Loss = 3.7960e-02, PNorm = 70.6408, GNorm = 0.5695, lr_0 = 1.6547e-04
Loss = 4.0887e-02, PNorm = 70.6441, GNorm = 0.6108, lr_0 = 1.6536e-04
Loss = 3.9065e-02, PNorm = 70.6468, GNorm = 0.7500, lr_0 = 1.6524e-04
Loss = 3.9423e-02, PNorm = 70.6489, GNorm = 0.5423, lr_0 = 1.6513e-04
Loss = 4.3095e-02, PNorm = 70.6517, GNorm = 0.6751, lr_0 = 1.6502e-04
Loss = 4.0076e-02, PNorm = 70.6550, GNorm = 0.4753, lr_0 = 1.6490e-04
Loss = 4.0104e-02, PNorm = 70.6575, GNorm = 0.6032, lr_0 = 1.6479e-04
Loss = 4.2336e-02, PNorm = 70.6585, GNorm = 0.5505, lr_0 = 1.6468e-04
Loss = 4.5414e-02, PNorm = 70.6595, GNorm = 0.6653, lr_0 = 1.6457e-04
Loss = 4.1908e-02, PNorm = 70.6600, GNorm = 0.4440, lr_0 = 1.6445e-04
Loss = 3.8442e-02, PNorm = 70.6610, GNorm = 0.4422, lr_0 = 1.6434e-04
Loss = 3.5747e-02, PNorm = 70.6630, GNorm = 0.5515, lr_0 = 1.6423e-04
Loss = 3.8391e-02, PNorm = 70.6654, GNorm = 0.4936, lr_0 = 1.6412e-04
Loss = 3.8289e-02, PNorm = 70.6672, GNorm = 0.5502, lr_0 = 1.6400e-04
Loss = 4.1036e-02, PNorm = 70.6694, GNorm = 0.4647, lr_0 = 1.6389e-04
Loss = 4.4162e-02, PNorm = 70.6717, GNorm = 0.4514, lr_0 = 1.6378e-04
Validation mae = 0.392312
Epoch 24
Loss = 3.2493e-02, PNorm = 70.6739, GNorm = 0.6885, lr_0 = 1.6367e-04
Loss = 3.4113e-02, PNorm = 70.6766, GNorm = 0.4587, lr_0 = 1.6355e-04
Loss = 3.5056e-02, PNorm = 70.6801, GNorm = 0.6781, lr_0 = 1.6344e-04
Loss = 3.2742e-02, PNorm = 70.6838, GNorm = 0.3685, lr_0 = 1.6333e-04
Loss = 3.7406e-02, PNorm = 70.6868, GNorm = 0.4999, lr_0 = 1.6322e-04
Loss = 3.6137e-02, PNorm = 70.6878, GNorm = 0.5733, lr_0 = 1.6311e-04
Loss = 3.9595e-02, PNorm = 70.6896, GNorm = 0.6073, lr_0 = 1.6299e-04
Loss = 3.2518e-02, PNorm = 70.6919, GNorm = 0.4318, lr_0 = 1.6288e-04
Loss = 3.4337e-02, PNorm = 70.6951, GNorm = 0.5367, lr_0 = 1.6277e-04
Loss = 3.7683e-02, PNorm = 70.6976, GNorm = 0.4958, lr_0 = 1.6266e-04
Loss = 3.7094e-02, PNorm = 70.7002, GNorm = 0.4249, lr_0 = 1.6255e-04
Loss = 3.6225e-02, PNorm = 70.7026, GNorm = 0.5593, lr_0 = 1.6244e-04
Loss = 3.3518e-02, PNorm = 70.7064, GNorm = 0.5334, lr_0 = 1.6233e-04
Loss = 3.8327e-02, PNorm = 70.7088, GNorm = 0.5376, lr_0 = 1.6221e-04
Loss = 3.6172e-02, PNorm = 70.7119, GNorm = 0.5035, lr_0 = 1.6210e-04
Loss = 4.2207e-02, PNorm = 70.7147, GNorm = 0.8290, lr_0 = 1.6199e-04
Loss = 3.2098e-02, PNorm = 70.7174, GNorm = 0.4336, lr_0 = 1.6188e-04
Loss = 3.5715e-02, PNorm = 70.7185, GNorm = 0.4461, lr_0 = 1.6177e-04
Loss = 3.3199e-02, PNorm = 70.7192, GNorm = 0.5139, lr_0 = 1.6166e-04
Loss = 3.7424e-02, PNorm = 70.7211, GNorm = 0.5346, lr_0 = 1.6155e-04
Loss = 3.7870e-02, PNorm = 70.7239, GNorm = 0.5463, lr_0 = 1.6144e-04
Loss = 3.3753e-02, PNorm = 70.7263, GNorm = 0.8536, lr_0 = 1.6133e-04
Loss = 3.0417e-02, PNorm = 70.7293, GNorm = 0.5332, lr_0 = 1.6122e-04
Loss = 3.0485e-02, PNorm = 70.7324, GNorm = 0.5314, lr_0 = 1.6111e-04
Loss = 3.3691e-02, PNorm = 70.7366, GNorm = 0.4190, lr_0 = 1.6100e-04
Loss = 4.0737e-02, PNorm = 70.7393, GNorm = 0.4964, lr_0 = 1.6089e-04
Loss = 3.8137e-02, PNorm = 70.7403, GNorm = 0.4018, lr_0 = 1.6078e-04
Loss = 2.8458e-02, PNorm = 70.7425, GNorm = 0.3671, lr_0 = 1.6067e-04
Loss = 3.9345e-02, PNorm = 70.7451, GNorm = 0.8444, lr_0 = 1.6056e-04
Loss = 3.8040e-02, PNorm = 70.7495, GNorm = 0.5844, lr_0 = 1.6045e-04
Loss = 3.7450e-02, PNorm = 70.7537, GNorm = 0.3526, lr_0 = 1.6034e-04
Loss = 3.9355e-02, PNorm = 70.7563, GNorm = 0.4154, lr_0 = 1.6023e-04
Loss = 3.9789e-02, PNorm = 70.7589, GNorm = 0.4837, lr_0 = 1.6012e-04
Loss = 3.2219e-02, PNorm = 70.7619, GNorm = 0.4197, lr_0 = 1.6001e-04
Loss = 3.8927e-02, PNorm = 70.7644, GNorm = 0.4562, lr_0 = 1.5990e-04
Loss = 3.8818e-02, PNorm = 70.7664, GNorm = 0.4274, lr_0 = 1.5979e-04
Loss = 4.1352e-02, PNorm = 70.7698, GNorm = 0.5710, lr_0 = 1.5968e-04
Loss = 3.8847e-02, PNorm = 70.7725, GNorm = 0.4868, lr_0 = 1.5957e-04
Loss = 3.3646e-02, PNorm = 70.7742, GNorm = 0.5101, lr_0 = 1.5946e-04
Loss = 3.7404e-02, PNorm = 70.7761, GNorm = 0.6370, lr_0 = 1.5935e-04
Loss = 3.4379e-02, PNorm = 70.7773, GNorm = 0.5155, lr_0 = 1.5924e-04
Loss = 2.9113e-02, PNorm = 70.7789, GNorm = 0.4948, lr_0 = 1.5913e-04
Loss = 3.4653e-02, PNorm = 70.7807, GNorm = 0.4657, lr_0 = 1.5902e-04
Loss = 4.0812e-02, PNorm = 70.7821, GNorm = 0.6331, lr_0 = 1.5891e-04
Loss = 4.2086e-02, PNorm = 70.7836, GNorm = 0.5313, lr_0 = 1.5880e-04
Loss = 3.4971e-02, PNorm = 70.7857, GNorm = 0.4343, lr_0 = 1.5870e-04
Loss = 3.9124e-02, PNorm = 70.7882, GNorm = 0.4237, lr_0 = 1.5859e-04
Loss = 3.8453e-02, PNorm = 70.7907, GNorm = 0.7719, lr_0 = 1.5848e-04
Loss = 3.6881e-02, PNorm = 70.7934, GNorm = 0.4468, lr_0 = 1.5837e-04
Loss = 3.9840e-02, PNorm = 70.7974, GNorm = 0.6733, lr_0 = 1.5826e-04
Loss = 3.7173e-02, PNorm = 70.8007, GNorm = 0.4047, lr_0 = 1.5815e-04
Loss = 3.3307e-02, PNorm = 70.8028, GNorm = 0.4259, lr_0 = 1.5804e-04
Loss = 4.5528e-02, PNorm = 70.8041, GNorm = 0.5421, lr_0 = 1.5794e-04
Loss = 4.2183e-02, PNorm = 70.8051, GNorm = 0.4548, lr_0 = 1.5783e-04
Loss = 4.3974e-02, PNorm = 70.8065, GNorm = 0.4464, lr_0 = 1.5772e-04
Loss = 3.8474e-02, PNorm = 70.8098, GNorm = 0.5041, lr_0 = 1.5761e-04
Loss = 4.1416e-02, PNorm = 70.8139, GNorm = 0.6096, lr_0 = 1.5750e-04
Loss = 3.6884e-02, PNorm = 70.8159, GNorm = 0.6997, lr_0 = 1.5740e-04
Loss = 5.1586e-02, PNorm = 70.8204, GNorm = 0.5569, lr_0 = 1.5729e-04
Loss = 3.8348e-02, PNorm = 70.8220, GNorm = 0.5917, lr_0 = 1.5718e-04
Loss = 3.9243e-02, PNorm = 70.8224, GNorm = 0.4058, lr_0 = 1.5707e-04
Loss = 3.9573e-02, PNorm = 70.8256, GNorm = 0.3662, lr_0 = 1.5697e-04
Loss = 3.1759e-02, PNorm = 70.8285, GNorm = 0.3711, lr_0 = 1.5686e-04
Loss = 3.7512e-02, PNorm = 70.8305, GNorm = 0.5579, lr_0 = 1.5675e-04
Loss = 3.6024e-02, PNorm = 70.8315, GNorm = 0.4119, lr_0 = 1.5664e-04
Loss = 3.6080e-02, PNorm = 70.8335, GNorm = 0.6553, lr_0 = 1.5654e-04
Loss = 3.5872e-02, PNorm = 70.8368, GNorm = 0.5619, lr_0 = 1.5643e-04
Loss = 3.8320e-02, PNorm = 70.8408, GNorm = 0.3983, lr_0 = 1.5632e-04
Loss = 3.9206e-02, PNorm = 70.8432, GNorm = 0.6095, lr_0 = 1.5621e-04
Loss = 4.1299e-02, PNorm = 70.8447, GNorm = 0.4543, lr_0 = 1.5611e-04
Loss = 3.5187e-02, PNorm = 70.8464, GNorm = 0.5153, lr_0 = 1.5600e-04
Loss = 3.5248e-02, PNorm = 70.8494, GNorm = 0.5860, lr_0 = 1.5589e-04
Loss = 3.5434e-02, PNorm = 70.8510, GNorm = 0.5727, lr_0 = 1.5579e-04
Loss = 3.7261e-02, PNorm = 70.8524, GNorm = 0.4704, lr_0 = 1.5568e-04
Loss = 3.5378e-02, PNorm = 70.8556, GNorm = 0.4955, lr_0 = 1.5557e-04
Loss = 3.6686e-02, PNorm = 70.8567, GNorm = 0.6851, lr_0 = 1.5547e-04
Loss = 3.6022e-02, PNorm = 70.8574, GNorm = 0.4040, lr_0 = 1.5536e-04
Loss = 3.3721e-02, PNorm = 70.8598, GNorm = 0.4814, lr_0 = 1.5525e-04
Loss = 3.5189e-02, PNorm = 70.8623, GNorm = 0.4720, lr_0 = 1.5515e-04
Loss = 4.0467e-02, PNorm = 70.8626, GNorm = 0.6063, lr_0 = 1.5504e-04
Loss = 3.7742e-02, PNorm = 70.8649, GNorm = 0.6112, lr_0 = 1.5493e-04
Loss = 3.5503e-02, PNorm = 70.8680, GNorm = 0.5688, lr_0 = 1.5483e-04
Loss = 3.7507e-02, PNorm = 70.8705, GNorm = 0.4026, lr_0 = 1.5472e-04
Loss = 4.0183e-02, PNorm = 70.8731, GNorm = 0.3774, lr_0 = 1.5462e-04
Loss = 3.2768e-02, PNorm = 70.8755, GNorm = 0.3992, lr_0 = 1.5451e-04
Loss = 3.7637e-02, PNorm = 70.8780, GNorm = 0.6603, lr_0 = 1.5440e-04
Loss = 3.5674e-02, PNorm = 70.8799, GNorm = 0.5755, lr_0 = 1.5430e-04
Loss = 4.0595e-02, PNorm = 70.8821, GNorm = 0.4834, lr_0 = 1.5419e-04
Loss = 4.2214e-02, PNorm = 70.8844, GNorm = 0.6827, lr_0 = 1.5409e-04
Loss = 4.1794e-02, PNorm = 70.8865, GNorm = 0.4611, lr_0 = 1.5398e-04
Loss = 3.3151e-02, PNorm = 70.8888, GNorm = 0.4609, lr_0 = 1.5388e-04
Loss = 4.0441e-02, PNorm = 70.8904, GNorm = 0.5633, lr_0 = 1.5377e-04
Loss = 3.5819e-02, PNorm = 70.8928, GNorm = 0.5032, lr_0 = 1.5367e-04
Loss = 4.1714e-02, PNorm = 70.8944, GNorm = 0.7288, lr_0 = 1.5356e-04
Loss = 3.9052e-02, PNorm = 70.8951, GNorm = 0.4925, lr_0 = 1.5346e-04
Loss = 3.8696e-02, PNorm = 70.8967, GNorm = 0.6167, lr_0 = 1.5335e-04
Loss = 4.6689e-02, PNorm = 70.8996, GNorm = 0.5847, lr_0 = 1.5325e-04
Loss = 4.2664e-02, PNorm = 70.9023, GNorm = 0.5405, lr_0 = 1.5314e-04
Loss = 4.4243e-02, PNorm = 70.9043, GNorm = 0.5265, lr_0 = 1.5304e-04
Loss = 3.5306e-02, PNorm = 70.9043, GNorm = 0.6311, lr_0 = 1.5293e-04
Loss = 3.5569e-02, PNorm = 70.9052, GNorm = 0.5867, lr_0 = 1.5283e-04
Loss = 4.3818e-02, PNorm = 70.9077, GNorm = 0.5606, lr_0 = 1.5272e-04
Loss = 4.5591e-02, PNorm = 70.9116, GNorm = 0.8144, lr_0 = 1.5262e-04
Loss = 3.8171e-02, PNorm = 70.9145, GNorm = 0.4813, lr_0 = 1.5251e-04
Loss = 4.3005e-02, PNorm = 70.9162, GNorm = 0.6308, lr_0 = 1.5241e-04
Loss = 4.4663e-02, PNorm = 70.9182, GNorm = 0.4958, lr_0 = 1.5230e-04
Loss = 3.6486e-02, PNorm = 70.9211, GNorm = 0.4119, lr_0 = 1.5220e-04
Loss = 3.8947e-02, PNorm = 70.9239, GNorm = 0.5247, lr_0 = 1.5209e-04
Loss = 4.0259e-02, PNorm = 70.9262, GNorm = 0.4904, lr_0 = 1.5199e-04
Loss = 3.1714e-02, PNorm = 70.9286, GNorm = 0.3837, lr_0 = 1.5189e-04
Loss = 3.7902e-02, PNorm = 70.9311, GNorm = 0.5339, lr_0 = 1.5178e-04
Loss = 3.6287e-02, PNorm = 70.9328, GNorm = 0.5684, lr_0 = 1.5168e-04
Loss = 3.9746e-02, PNorm = 70.9351, GNorm = 0.5698, lr_0 = 1.5157e-04
Loss = 3.9508e-02, PNorm = 70.9361, GNorm = 0.6077, lr_0 = 1.5147e-04
Loss = 3.4678e-02, PNorm = 70.9350, GNorm = 0.4199, lr_0 = 1.5137e-04
Loss = 3.8024e-02, PNorm = 70.9361, GNorm = 0.5820, lr_0 = 1.5126e-04
Loss = 3.7660e-02, PNorm = 70.9381, GNorm = 0.4540, lr_0 = 1.5116e-04
Loss = 3.8347e-02, PNorm = 70.9391, GNorm = 0.4886, lr_0 = 1.5106e-04
Loss = 4.4080e-02, PNorm = 70.9394, GNorm = 0.6784, lr_0 = 1.5095e-04
Loss = 3.8390e-02, PNorm = 70.9427, GNorm = 0.5469, lr_0 = 1.5085e-04
Validation mae = 0.389127
Epoch 25
Loss = 3.4161e-02, PNorm = 70.9455, GNorm = 0.4010, lr_0 = 1.5075e-04
Loss = 3.2350e-02, PNorm = 70.9490, GNorm = 0.4999, lr_0 = 1.5064e-04
Loss = 3.0051e-02, PNorm = 70.9520, GNorm = 0.4208, lr_0 = 1.5054e-04
Loss = 3.6571e-02, PNorm = 70.9540, GNorm = 0.4620, lr_0 = 1.5044e-04
Loss = 3.4957e-02, PNorm = 70.9559, GNorm = 0.3202, lr_0 = 1.5033e-04
Loss = 3.1078e-02, PNorm = 70.9586, GNorm = 0.4807, lr_0 = 1.5023e-04
Loss = 3.4075e-02, PNorm = 70.9611, GNorm = 0.4505, lr_0 = 1.5013e-04
Loss = 3.2299e-02, PNorm = 70.9640, GNorm = 0.6344, lr_0 = 1.5002e-04
Loss = 3.3614e-02, PNorm = 70.9653, GNorm = 0.7142, lr_0 = 1.4992e-04
Loss = 2.8780e-02, PNorm = 70.9664, GNorm = 0.4199, lr_0 = 1.4982e-04
Loss = 3.5050e-02, PNorm = 70.9674, GNorm = 0.3895, lr_0 = 1.4972e-04
Loss = 3.3082e-02, PNorm = 70.9697, GNorm = 0.3984, lr_0 = 1.4961e-04
Loss = 2.9569e-02, PNorm = 70.9721, GNorm = 0.3536, lr_0 = 1.4951e-04
Loss = 2.9745e-02, PNorm = 70.9741, GNorm = 0.4396, lr_0 = 1.4941e-04
Loss = 3.3312e-02, PNorm = 70.9755, GNorm = 0.4359, lr_0 = 1.4931e-04
Loss = 3.2450e-02, PNorm = 70.9772, GNorm = 0.5627, lr_0 = 1.4920e-04
Loss = 3.2428e-02, PNorm = 70.9793, GNorm = 0.5258, lr_0 = 1.4910e-04
Loss = 2.9107e-02, PNorm = 70.9811, GNorm = 0.3086, lr_0 = 1.4900e-04
Loss = 3.3296e-02, PNorm = 70.9829, GNorm = 0.4655, lr_0 = 1.4890e-04
Loss = 3.6227e-02, PNorm = 70.9858, GNorm = 0.5121, lr_0 = 1.4880e-04
Loss = 3.2653e-02, PNorm = 70.9891, GNorm = 0.3827, lr_0 = 1.4869e-04
Loss = 3.2106e-02, PNorm = 70.9911, GNorm = 0.3211, lr_0 = 1.4859e-04
Loss = 3.4949e-02, PNorm = 70.9930, GNorm = 0.5773, lr_0 = 1.4849e-04
Loss = 3.6489e-02, PNorm = 70.9953, GNorm = 0.5097, lr_0 = 1.4839e-04
Loss = 3.1206e-02, PNorm = 70.9967, GNorm = 0.5110, lr_0 = 1.4829e-04
Loss = 3.2147e-02, PNorm = 70.9985, GNorm = 0.3586, lr_0 = 1.4818e-04
Loss = 3.4023e-02, PNorm = 71.0011, GNorm = 0.5518, lr_0 = 1.4808e-04
Loss = 3.5535e-02, PNorm = 71.0041, GNorm = 0.5628, lr_0 = 1.4798e-04
Loss = 3.3609e-02, PNorm = 71.0066, GNorm = 0.3870, lr_0 = 1.4788e-04
Loss = 3.3607e-02, PNorm = 71.0088, GNorm = 0.5211, lr_0 = 1.4778e-04
Loss = 3.0592e-02, PNorm = 71.0106, GNorm = 0.4424, lr_0 = 1.4768e-04
Loss = 3.4027e-02, PNorm = 71.0128, GNorm = 0.5286, lr_0 = 1.4758e-04
Loss = 3.4493e-02, PNorm = 71.0149, GNorm = 0.4250, lr_0 = 1.4748e-04
Loss = 3.5361e-02, PNorm = 71.0179, GNorm = 0.4349, lr_0 = 1.4737e-04
Loss = 3.6400e-02, PNorm = 71.0206, GNorm = 0.5278, lr_0 = 1.4727e-04
Loss = 3.3715e-02, PNorm = 71.0215, GNorm = 0.3736, lr_0 = 1.4717e-04
Loss = 3.5632e-02, PNorm = 71.0239, GNorm = 0.3316, lr_0 = 1.4707e-04
Loss = 2.8301e-02, PNorm = 71.0264, GNorm = 0.4041, lr_0 = 1.4697e-04
Loss = 3.5246e-02, PNorm = 71.0280, GNorm = 0.5167, lr_0 = 1.4687e-04
Loss = 3.2685e-02, PNorm = 71.0297, GNorm = 0.5500, lr_0 = 1.4677e-04
Loss = 3.6111e-02, PNorm = 71.0321, GNorm = 0.4996, lr_0 = 1.4667e-04
Loss = 3.7027e-02, PNorm = 71.0343, GNorm = 0.4936, lr_0 = 1.4657e-04
Loss = 3.8044e-02, PNorm = 71.0368, GNorm = 0.4328, lr_0 = 1.4647e-04
Loss = 3.5664e-02, PNorm = 71.0391, GNorm = 0.4432, lr_0 = 1.4637e-04
Loss = 4.2930e-02, PNorm = 71.0425, GNorm = 0.4647, lr_0 = 1.4627e-04
Loss = 4.3221e-02, PNorm = 71.0455, GNorm = 0.6531, lr_0 = 1.4617e-04
Loss = 3.5039e-02, PNorm = 71.0487, GNorm = 0.5253, lr_0 = 1.4607e-04
Loss = 3.8023e-02, PNorm = 71.0516, GNorm = 0.5868, lr_0 = 1.4597e-04
Loss = 3.6279e-02, PNorm = 71.0538, GNorm = 0.7907, lr_0 = 1.4587e-04
Loss = 3.0861e-02, PNorm = 71.0557, GNorm = 0.5095, lr_0 = 1.4577e-04
Loss = 3.7528e-02, PNorm = 71.0583, GNorm = 0.6053, lr_0 = 1.4567e-04
Loss = 3.3357e-02, PNorm = 71.0613, GNorm = 0.4764, lr_0 = 1.4557e-04
Loss = 3.3246e-02, PNorm = 71.0641, GNorm = 0.6965, lr_0 = 1.4547e-04
Loss = 3.9260e-02, PNorm = 71.0666, GNorm = 0.4951, lr_0 = 1.4537e-04
Loss = 3.7846e-02, PNorm = 71.0686, GNorm = 0.5179, lr_0 = 1.4527e-04
Loss = 3.7668e-02, PNorm = 71.0707, GNorm = 0.4165, lr_0 = 1.4517e-04
Loss = 3.9927e-02, PNorm = 71.0738, GNorm = 0.3871, lr_0 = 1.4507e-04
Loss = 3.0680e-02, PNorm = 71.0763, GNorm = 0.6371, lr_0 = 1.4497e-04
Loss = 3.6798e-02, PNorm = 71.0775, GNorm = 0.4642, lr_0 = 1.4487e-04
Loss = 4.0152e-02, PNorm = 71.0782, GNorm = 0.7911, lr_0 = 1.4477e-04
Loss = 4.1529e-02, PNorm = 71.0798, GNorm = 0.4017, lr_0 = 1.4467e-04
Loss = 4.3670e-02, PNorm = 71.0824, GNorm = 0.5394, lr_0 = 1.4457e-04
Loss = 4.2270e-02, PNorm = 71.0856, GNorm = 0.7614, lr_0 = 1.4447e-04
Loss = 3.5405e-02, PNorm = 71.0867, GNorm = 0.4127, lr_0 = 1.4438e-04
Loss = 3.7935e-02, PNorm = 71.0884, GNorm = 0.4626, lr_0 = 1.4428e-04
Loss = 3.2910e-02, PNorm = 71.0918, GNorm = 0.6597, lr_0 = 1.4418e-04
Loss = 2.9176e-02, PNorm = 71.0946, GNorm = 0.3715, lr_0 = 1.4408e-04
Loss = 3.8815e-02, PNorm = 71.0953, GNorm = 0.5482, lr_0 = 1.4398e-04
Loss = 3.8255e-02, PNorm = 71.0962, GNorm = 0.3601, lr_0 = 1.4388e-04
Loss = 4.3895e-02, PNorm = 71.0985, GNorm = 0.6341, lr_0 = 1.4378e-04
Loss = 3.5714e-02, PNorm = 71.0994, GNorm = 0.4680, lr_0 = 1.4368e-04
Loss = 3.9761e-02, PNorm = 71.1006, GNorm = 0.5647, lr_0 = 1.4359e-04
Loss = 3.1995e-02, PNorm = 71.1028, GNorm = 0.4577, lr_0 = 1.4349e-04
Loss = 3.1601e-02, PNorm = 71.1046, GNorm = 0.4298, lr_0 = 1.4339e-04
Loss = 3.1887e-02, PNorm = 71.1062, GNorm = 0.4249, lr_0 = 1.4329e-04
Loss = 3.7820e-02, PNorm = 71.1081, GNorm = 0.5357, lr_0 = 1.4319e-04
Loss = 3.0159e-02, PNorm = 71.1097, GNorm = 0.4400, lr_0 = 1.4310e-04
Loss = 4.0417e-02, PNorm = 71.1111, GNorm = 0.4642, lr_0 = 1.4300e-04
Loss = 2.9554e-02, PNorm = 71.1126, GNorm = 0.5669, lr_0 = 1.4290e-04
Loss = 4.5813e-02, PNorm = 71.1151, GNorm = 0.4297, lr_0 = 1.4280e-04
Loss = 4.0560e-02, PNorm = 71.1184, GNorm = 0.5425, lr_0 = 1.4270e-04
Loss = 3.6782e-02, PNorm = 71.1222, GNorm = 0.6147, lr_0 = 1.4261e-04
Loss = 3.7229e-02, PNorm = 71.1234, GNorm = 0.5184, lr_0 = 1.4251e-04
Loss = 3.3093e-02, PNorm = 71.1243, GNorm = 0.6061, lr_0 = 1.4241e-04
Loss = 3.9568e-02, PNorm = 71.1248, GNorm = 0.4138, lr_0 = 1.4231e-04
Loss = 3.8723e-02, PNorm = 71.1256, GNorm = 0.4841, lr_0 = 1.4222e-04
Loss = 3.9133e-02, PNorm = 71.1272, GNorm = 0.5789, lr_0 = 1.4212e-04
Loss = 3.6459e-02, PNorm = 71.1292, GNorm = 0.5980, lr_0 = 1.4202e-04
Loss = 3.7775e-02, PNorm = 71.1307, GNorm = 0.4647, lr_0 = 1.4192e-04
Loss = 3.2244e-02, PNorm = 71.1324, GNorm = 0.4926, lr_0 = 1.4183e-04
Loss = 4.1946e-02, PNorm = 71.1337, GNorm = 0.5729, lr_0 = 1.4173e-04
Loss = 3.4992e-02, PNorm = 71.1356, GNorm = 0.5496, lr_0 = 1.4163e-04
Loss = 3.3392e-02, PNorm = 71.1371, GNorm = 0.4672, lr_0 = 1.4153e-04
Loss = 4.2170e-02, PNorm = 71.1392, GNorm = 0.5750, lr_0 = 1.4144e-04
Loss = 3.4118e-02, PNorm = 71.1403, GNorm = 0.6119, lr_0 = 1.4134e-04
Loss = 3.2572e-02, PNorm = 71.1422, GNorm = 0.4937, lr_0 = 1.4124e-04
Loss = 3.4181e-02, PNorm = 71.1455, GNorm = 0.6436, lr_0 = 1.4115e-04
Loss = 3.7116e-02, PNorm = 71.1488, GNorm = 0.4014, lr_0 = 1.4105e-04
Loss = 3.5377e-02, PNorm = 71.1522, GNorm = 0.4403, lr_0 = 1.4095e-04
Loss = 3.3587e-02, PNorm = 71.1530, GNorm = 0.6567, lr_0 = 1.4086e-04
Loss = 3.3362e-02, PNorm = 71.1538, GNorm = 0.3706, lr_0 = 1.4076e-04
Loss = 3.9867e-02, PNorm = 71.1553, GNorm = 0.4489, lr_0 = 1.4066e-04
Loss = 3.7680e-02, PNorm = 71.1567, GNorm = 0.5038, lr_0 = 1.4057e-04
Loss = 4.1644e-02, PNorm = 71.1589, GNorm = 0.3929, lr_0 = 1.4047e-04
Loss = 4.0720e-02, PNorm = 71.1611, GNorm = 0.4683, lr_0 = 1.4038e-04
Loss = 3.5857e-02, PNorm = 71.1633, GNorm = 0.4110, lr_0 = 1.4028e-04
Loss = 3.8211e-02, PNorm = 71.1661, GNorm = 0.4686, lr_0 = 1.4018e-04
Loss = 3.4616e-02, PNorm = 71.1699, GNorm = 0.6077, lr_0 = 1.4009e-04
Loss = 3.6281e-02, PNorm = 71.1721, GNorm = 0.5295, lr_0 = 1.3999e-04
Loss = 3.1539e-02, PNorm = 71.1737, GNorm = 0.3213, lr_0 = 1.3990e-04
Loss = 3.6947e-02, PNorm = 71.1756, GNorm = 0.6206, lr_0 = 1.3980e-04
Loss = 3.9209e-02, PNorm = 71.1766, GNorm = 0.3922, lr_0 = 1.3970e-04
Loss = 3.9310e-02, PNorm = 71.1785, GNorm = 0.7448, lr_0 = 1.3961e-04
Loss = 3.4912e-02, PNorm = 71.1803, GNorm = 0.4627, lr_0 = 1.3951e-04
Loss = 3.5697e-02, PNorm = 71.1825, GNorm = 0.5353, lr_0 = 1.3942e-04
Loss = 3.2997e-02, PNorm = 71.1838, GNorm = 0.4314, lr_0 = 1.3932e-04
Loss = 4.0467e-02, PNorm = 71.1853, GNorm = 0.6149, lr_0 = 1.3923e-04
Loss = 3.5994e-02, PNorm = 71.1870, GNorm = 0.4912, lr_0 = 1.3913e-04
Loss = 3.7473e-02, PNorm = 71.1871, GNorm = 0.4001, lr_0 = 1.3904e-04
Loss = 3.8418e-02, PNorm = 71.1877, GNorm = 0.5139, lr_0 = 1.3894e-04
Validation mae = 0.388329
Epoch 26
Loss = 3.1349e-02, PNorm = 71.1905, GNorm = 0.4862, lr_0 = 1.3884e-04
Loss = 2.9854e-02, PNorm = 71.1934, GNorm = 0.5943, lr_0 = 1.3875e-04
Loss = 3.0743e-02, PNorm = 71.1961, GNorm = 0.4980, lr_0 = 1.3865e-04
Loss = 2.8797e-02, PNorm = 71.1984, GNorm = 0.4473, lr_0 = 1.3856e-04
Loss = 3.6012e-02, PNorm = 71.2001, GNorm = 0.5129, lr_0 = 1.3846e-04
Loss = 3.2702e-02, PNorm = 71.2028, GNorm = 0.5342, lr_0 = 1.3837e-04
Loss = 3.1805e-02, PNorm = 71.2055, GNorm = 0.4232, lr_0 = 1.3828e-04
Loss = 2.8731e-02, PNorm = 71.2075, GNorm = 0.4325, lr_0 = 1.3818e-04
Loss = 3.5782e-02, PNorm = 71.2095, GNorm = 0.6329, lr_0 = 1.3809e-04
Loss = 2.7249e-02, PNorm = 71.2108, GNorm = 0.4851, lr_0 = 1.3799e-04
Loss = 3.6154e-02, PNorm = 71.2122, GNorm = 0.6623, lr_0 = 1.3790e-04
Loss = 2.5246e-02, PNorm = 71.2144, GNorm = 0.4909, lr_0 = 1.3780e-04
Loss = 3.1712e-02, PNorm = 71.2158, GNorm = 0.5653, lr_0 = 1.3771e-04
Loss = 2.8242e-02, PNorm = 71.2177, GNorm = 0.4514, lr_0 = 1.3761e-04
Loss = 3.3078e-02, PNorm = 71.2208, GNorm = 0.3898, lr_0 = 1.3752e-04
Loss = 3.6298e-02, PNorm = 71.2238, GNorm = 0.5639, lr_0 = 1.3742e-04
Loss = 3.2000e-02, PNorm = 71.2265, GNorm = 0.4542, lr_0 = 1.3733e-04
Loss = 3.1728e-02, PNorm = 71.2287, GNorm = 0.4170, lr_0 = 1.3724e-04
Loss = 3.3668e-02, PNorm = 71.2304, GNorm = 0.3731, lr_0 = 1.3714e-04
Loss = 3.2730e-02, PNorm = 71.2328, GNorm = 0.5273, lr_0 = 1.3705e-04
Loss = 3.2758e-02, PNorm = 71.2343, GNorm = 0.6235, lr_0 = 1.3695e-04
Loss = 3.4369e-02, PNorm = 71.2359, GNorm = 0.4852, lr_0 = 1.3686e-04
Loss = 3.0536e-02, PNorm = 71.2372, GNorm = 0.5359, lr_0 = 1.3677e-04
Loss = 2.9326e-02, PNorm = 71.2384, GNorm = 0.3739, lr_0 = 1.3667e-04
Loss = 3.1785e-02, PNorm = 71.2415, GNorm = 0.4005, lr_0 = 1.3658e-04
Loss = 3.7625e-02, PNorm = 71.2440, GNorm = 0.4113, lr_0 = 1.3649e-04
Loss = 3.3677e-02, PNorm = 71.2468, GNorm = 0.5044, lr_0 = 1.3639e-04
Loss = 3.2371e-02, PNorm = 71.2500, GNorm = 0.4330, lr_0 = 1.3630e-04
Loss = 3.4350e-02, PNorm = 71.2528, GNorm = 0.4746, lr_0 = 1.3621e-04
Loss = 2.8534e-02, PNorm = 71.2558, GNorm = 0.5742, lr_0 = 1.3611e-04
Loss = 2.7176e-02, PNorm = 71.2590, GNorm = 0.3487, lr_0 = 1.3602e-04
Loss = 3.6003e-02, PNorm = 71.2610, GNorm = 0.6338, lr_0 = 1.3593e-04
Loss = 4.2178e-02, PNorm = 71.2630, GNorm = 0.4337, lr_0 = 1.3583e-04
Loss = 3.6001e-02, PNorm = 71.2659, GNorm = 0.5837, lr_0 = 1.3574e-04
Loss = 2.7281e-02, PNorm = 71.2684, GNorm = 0.4411, lr_0 = 1.3565e-04
Loss = 3.6968e-02, PNorm = 71.2714, GNorm = 0.5810, lr_0 = 1.3555e-04
Loss = 3.5742e-02, PNorm = 71.2731, GNorm = 0.4486, lr_0 = 1.3546e-04
Loss = 3.2160e-02, PNorm = 71.2745, GNorm = 0.4048, lr_0 = 1.3537e-04
Loss = 3.7690e-02, PNorm = 71.2762, GNorm = 0.5925, lr_0 = 1.3528e-04
Loss = 3.0422e-02, PNorm = 71.2784, GNorm = 0.3942, lr_0 = 1.3518e-04
Loss = 3.1819e-02, PNorm = 71.2790, GNorm = 0.4485, lr_0 = 1.3509e-04
Loss = 3.1602e-02, PNorm = 71.2808, GNorm = 0.5299, lr_0 = 1.3500e-04
Loss = 3.1500e-02, PNorm = 71.2831, GNorm = 0.5019, lr_0 = 1.3491e-04
Loss = 3.1896e-02, PNorm = 71.2847, GNorm = 0.4371, lr_0 = 1.3481e-04
Loss = 3.2534e-02, PNorm = 71.2862, GNorm = 0.4557, lr_0 = 1.3472e-04
Loss = 3.5230e-02, PNorm = 71.2877, GNorm = 0.5774, lr_0 = 1.3463e-04
Loss = 3.3417e-02, PNorm = 71.2896, GNorm = 0.3964, lr_0 = 1.3454e-04
Loss = 3.2352e-02, PNorm = 71.2919, GNorm = 0.3752, lr_0 = 1.3444e-04
Loss = 2.9189e-02, PNorm = 71.2937, GNorm = 0.4430, lr_0 = 1.3435e-04
Loss = 3.7048e-02, PNorm = 71.2949, GNorm = 0.9497, lr_0 = 1.3426e-04
Loss = 3.7021e-02, PNorm = 71.2970, GNorm = 0.4869, lr_0 = 1.3417e-04
Loss = 3.2894e-02, PNorm = 71.2983, GNorm = 0.6549, lr_0 = 1.3408e-04
Loss = 3.3205e-02, PNorm = 71.2994, GNorm = 0.4923, lr_0 = 1.3398e-04
Loss = 3.8416e-02, PNorm = 71.3014, GNorm = 0.4759, lr_0 = 1.3389e-04
Loss = 3.4475e-02, PNorm = 71.3033, GNorm = 0.4672, lr_0 = 1.3380e-04
Loss = 3.4574e-02, PNorm = 71.3044, GNorm = 0.6853, lr_0 = 1.3371e-04
Loss = 4.3112e-02, PNorm = 71.3057, GNorm = 0.6108, lr_0 = 1.3362e-04
Loss = 3.6412e-02, PNorm = 71.3070, GNorm = 0.8223, lr_0 = 1.3353e-04
Loss = 3.7863e-02, PNorm = 71.3092, GNorm = 0.5797, lr_0 = 1.3343e-04
Loss = 3.1107e-02, PNorm = 71.3109, GNorm = 0.4627, lr_0 = 1.3334e-04
Loss = 3.1928e-02, PNorm = 71.3134, GNorm = 0.4061, lr_0 = 1.3325e-04
Loss = 3.7311e-02, PNorm = 71.3162, GNorm = 0.6219, lr_0 = 1.3316e-04
Loss = 3.5134e-02, PNorm = 71.3178, GNorm = 0.4808, lr_0 = 1.3307e-04
Loss = 3.7526e-02, PNorm = 71.3194, GNorm = 0.3891, lr_0 = 1.3298e-04
Loss = 3.2654e-02, PNorm = 71.3206, GNorm = 0.5814, lr_0 = 1.3289e-04
Loss = 3.3893e-02, PNorm = 71.3215, GNorm = 0.4054, lr_0 = 1.3280e-04
Loss = 3.2802e-02, PNorm = 71.3230, GNorm = 0.4038, lr_0 = 1.3270e-04
Loss = 3.5014e-02, PNorm = 71.3254, GNorm = 0.6013, lr_0 = 1.3261e-04
Loss = 3.7266e-02, PNorm = 71.3274, GNorm = 0.4878, lr_0 = 1.3252e-04
Loss = 3.4438e-02, PNorm = 71.3286, GNorm = 0.4717, lr_0 = 1.3243e-04
Loss = 3.9303e-02, PNorm = 71.3298, GNorm = 0.6550, lr_0 = 1.3234e-04
Loss = 3.1548e-02, PNorm = 71.3315, GNorm = 0.6420, lr_0 = 1.3225e-04
Loss = 3.8382e-02, PNorm = 71.3331, GNorm = 0.7560, lr_0 = 1.3216e-04
Loss = 3.3388e-02, PNorm = 71.3350, GNorm = 0.4807, lr_0 = 1.3207e-04
Loss = 3.4996e-02, PNorm = 71.3362, GNorm = 0.5211, lr_0 = 1.3198e-04
Loss = 2.9504e-02, PNorm = 71.3375, GNorm = 0.4795, lr_0 = 1.3189e-04
Loss = 2.9593e-02, PNorm = 71.3397, GNorm = 0.4824, lr_0 = 1.3180e-04
Loss = 2.9106e-02, PNorm = 71.3423, GNorm = 0.3834, lr_0 = 1.3171e-04
Loss = 3.7951e-02, PNorm = 71.3442, GNorm = 0.5473, lr_0 = 1.3162e-04
Loss = 3.1679e-02, PNorm = 71.3461, GNorm = 0.5404, lr_0 = 1.3153e-04
Loss = 3.4709e-02, PNorm = 71.3473, GNorm = 0.5355, lr_0 = 1.3144e-04
Loss = 4.1252e-02, PNorm = 71.3495, GNorm = 0.4101, lr_0 = 1.3135e-04
Loss = 2.9777e-02, PNorm = 71.3512, GNorm = 0.4090, lr_0 = 1.3126e-04
Loss = 3.2871e-02, PNorm = 71.3533, GNorm = 0.5125, lr_0 = 1.3117e-04
Loss = 3.9107e-02, PNorm = 71.3551, GNorm = 0.4235, lr_0 = 1.3108e-04
Loss = 3.8364e-02, PNorm = 71.3561, GNorm = 0.5868, lr_0 = 1.3099e-04
Loss = 3.4670e-02, PNorm = 71.3576, GNorm = 0.4919, lr_0 = 1.3090e-04
Loss = 3.5908e-02, PNorm = 71.3587, GNorm = 0.4218, lr_0 = 1.3081e-04
Loss = 3.6927e-02, PNorm = 71.3589, GNorm = 0.6027, lr_0 = 1.3072e-04
Loss = 3.2351e-02, PNorm = 71.3605, GNorm = 0.5296, lr_0 = 1.3063e-04
Loss = 3.2252e-02, PNorm = 71.3620, GNorm = 0.5468, lr_0 = 1.3054e-04
Loss = 3.1739e-02, PNorm = 71.3636, GNorm = 0.4048, lr_0 = 1.3045e-04
Loss = 3.2054e-02, PNorm = 71.3648, GNorm = 0.4901, lr_0 = 1.3036e-04
Loss = 4.2763e-02, PNorm = 71.3673, GNorm = 0.6965, lr_0 = 1.3027e-04
Loss = 3.0888e-02, PNorm = 71.3700, GNorm = 0.4133, lr_0 = 1.3018e-04
Loss = 3.1439e-02, PNorm = 71.3727, GNorm = 0.3049, lr_0 = 1.3009e-04
Loss = 3.5710e-02, PNorm = 71.3748, GNorm = 0.6946, lr_0 = 1.3000e-04
Loss = 3.7565e-02, PNorm = 71.3758, GNorm = 0.7777, lr_0 = 1.2992e-04
Loss = 3.9502e-02, PNorm = 71.3766, GNorm = 0.4044, lr_0 = 1.2983e-04
Loss = 3.5052e-02, PNorm = 71.3798, GNorm = 0.7771, lr_0 = 1.2974e-04
Loss = 3.5160e-02, PNorm = 71.3814, GNorm = 0.5167, lr_0 = 1.2965e-04
Loss = 3.4738e-02, PNorm = 71.3832, GNorm = 0.7672, lr_0 = 1.2956e-04
Loss = 3.5788e-02, PNorm = 71.3854, GNorm = 0.4055, lr_0 = 1.2947e-04
Loss = 3.9732e-02, PNorm = 71.3863, GNorm = 0.3832, lr_0 = 1.2938e-04
Loss = 3.5518e-02, PNorm = 71.3878, GNorm = 0.4846, lr_0 = 1.2929e-04
Loss = 3.2682e-02, PNorm = 71.3900, GNorm = 0.4835, lr_0 = 1.2921e-04
Loss = 3.8172e-02, PNorm = 71.3923, GNorm = 0.4882, lr_0 = 1.2912e-04
Loss = 3.5136e-02, PNorm = 71.3947, GNorm = 0.5189, lr_0 = 1.2903e-04
Loss = 3.9333e-02, PNorm = 71.3964, GNorm = 0.4945, lr_0 = 1.2894e-04
Loss = 3.5079e-02, PNorm = 71.3985, GNorm = 0.6580, lr_0 = 1.2885e-04
Loss = 3.3257e-02, PNorm = 71.3994, GNorm = 0.4886, lr_0 = 1.2876e-04
Loss = 3.4444e-02, PNorm = 71.4006, GNorm = 0.5849, lr_0 = 1.2867e-04
Loss = 3.3534e-02, PNorm = 71.4024, GNorm = 0.4653, lr_0 = 1.2859e-04
Loss = 3.1116e-02, PNorm = 71.4050, GNorm = 0.3932, lr_0 = 1.2850e-04
Loss = 3.6685e-02, PNorm = 71.4068, GNorm = 0.4348, lr_0 = 1.2841e-04
Loss = 3.3104e-02, PNorm = 71.4080, GNorm = 0.4428, lr_0 = 1.2832e-04
Loss = 3.5911e-02, PNorm = 71.4092, GNorm = 0.4868, lr_0 = 1.2823e-04
Loss = 3.1970e-02, PNorm = 71.4111, GNorm = 0.4445, lr_0 = 1.2815e-04
Loss = 2.8221e-02, PNorm = 71.4139, GNorm = 0.4056, lr_0 = 1.2806e-04
Loss = 3.2508e-02, PNorm = 71.4160, GNorm = 0.5450, lr_0 = 1.2797e-04
Validation mae = 0.392735
Epoch 27
Loss = 2.9078e-02, PNorm = 71.4176, GNorm = 0.3859, lr_0 = 1.2788e-04
Loss = 3.4564e-02, PNorm = 71.4198, GNorm = 0.4280, lr_0 = 1.2780e-04
Loss = 2.6893e-02, PNorm = 71.4218, GNorm = 0.4753, lr_0 = 1.2771e-04
Loss = 2.9701e-02, PNorm = 71.4238, GNorm = 0.3809, lr_0 = 1.2762e-04
Loss = 2.5885e-02, PNorm = 71.4255, GNorm = 0.4369, lr_0 = 1.2753e-04
Loss = 2.7996e-02, PNorm = 71.4270, GNorm = 0.4254, lr_0 = 1.2745e-04
Loss = 3.4086e-02, PNorm = 71.4293, GNorm = 0.4270, lr_0 = 1.2736e-04
Loss = 2.7390e-02, PNorm = 71.4322, GNorm = 0.2991, lr_0 = 1.2727e-04
Loss = 3.0074e-02, PNorm = 71.4339, GNorm = 0.4241, lr_0 = 1.2718e-04
Loss = 3.0166e-02, PNorm = 71.4353, GNorm = 0.6056, lr_0 = 1.2710e-04
Loss = 3.2117e-02, PNorm = 71.4364, GNorm = 0.5051, lr_0 = 1.2701e-04
Loss = 2.6378e-02, PNorm = 71.4372, GNorm = 0.5275, lr_0 = 1.2692e-04
Loss = 3.1012e-02, PNorm = 71.4387, GNorm = 0.4322, lr_0 = 1.2684e-04
Loss = 2.7016e-02, PNorm = 71.4401, GNorm = 0.4548, lr_0 = 1.2675e-04
Loss = 2.9171e-02, PNorm = 71.4413, GNorm = 0.4845, lr_0 = 1.2666e-04
Loss = 3.6647e-02, PNorm = 71.4427, GNorm = 0.5338, lr_0 = 1.2658e-04
Loss = 2.9189e-02, PNorm = 71.4440, GNorm = 0.5151, lr_0 = 1.2649e-04
Loss = 3.0600e-02, PNorm = 71.4461, GNorm = 0.6350, lr_0 = 1.2640e-04
Loss = 3.0596e-02, PNorm = 71.4488, GNorm = 0.4355, lr_0 = 1.2632e-04
Loss = 3.1299e-02, PNorm = 71.4512, GNorm = 0.3983, lr_0 = 1.2623e-04
Loss = 2.9097e-02, PNorm = 71.4528, GNorm = 0.4145, lr_0 = 1.2614e-04
Loss = 2.7146e-02, PNorm = 71.4547, GNorm = 0.5340, lr_0 = 1.2606e-04
Loss = 2.8146e-02, PNorm = 71.4575, GNorm = 0.5229, lr_0 = 1.2597e-04
Loss = 3.7622e-02, PNorm = 71.4587, GNorm = 0.4773, lr_0 = 1.2588e-04
Loss = 3.3788e-02, PNorm = 71.4588, GNorm = 0.4958, lr_0 = 1.2580e-04
Loss = 3.1431e-02, PNorm = 71.4612, GNorm = 0.4185, lr_0 = 1.2571e-04
Loss = 3.0059e-02, PNorm = 71.4632, GNorm = 0.5039, lr_0 = 1.2563e-04
Loss = 3.0470e-02, PNorm = 71.4650, GNorm = 0.5497, lr_0 = 1.2554e-04
Loss = 3.4862e-02, PNorm = 71.4673, GNorm = 0.4854, lr_0 = 1.2545e-04
Loss = 3.3220e-02, PNorm = 71.4697, GNorm = 0.5418, lr_0 = 1.2537e-04
Loss = 2.7743e-02, PNorm = 71.4718, GNorm = 0.5100, lr_0 = 1.2528e-04
Loss = 3.0897e-02, PNorm = 71.4734, GNorm = 0.3558, lr_0 = 1.2520e-04
Loss = 2.7527e-02, PNorm = 71.4759, GNorm = 0.5255, lr_0 = 1.2511e-04
Loss = 3.4441e-02, PNorm = 71.4773, GNorm = 0.6650, lr_0 = 1.2502e-04
Loss = 3.2233e-02, PNorm = 71.4788, GNorm = 0.4356, lr_0 = 1.2494e-04
Loss = 3.0138e-02, PNorm = 71.4804, GNorm = 0.4551, lr_0 = 1.2485e-04
Loss = 3.3758e-02, PNorm = 71.4821, GNorm = 0.3769, lr_0 = 1.2477e-04
Loss = 3.7445e-02, PNorm = 71.4835, GNorm = 0.5611, lr_0 = 1.2468e-04
Loss = 3.5277e-02, PNorm = 71.4858, GNorm = 0.6303, lr_0 = 1.2460e-04
Loss = 2.7730e-02, PNorm = 71.4881, GNorm = 0.3966, lr_0 = 1.2451e-04
Loss = 3.2421e-02, PNorm = 71.4905, GNorm = 0.4364, lr_0 = 1.2443e-04
Loss = 3.3936e-02, PNorm = 71.4927, GNorm = 0.4522, lr_0 = 1.2434e-04
Loss = 2.6165e-02, PNorm = 71.4941, GNorm = 0.4261, lr_0 = 1.2426e-04
Loss = 2.9604e-02, PNorm = 71.4951, GNorm = 0.5714, lr_0 = 1.2417e-04
Loss = 3.1269e-02, PNorm = 71.4960, GNorm = 0.7583, lr_0 = 1.2409e-04
Loss = 3.2558e-02, PNorm = 71.4971, GNorm = 0.4425, lr_0 = 1.2400e-04
Loss = 3.2219e-02, PNorm = 71.4985, GNorm = 0.5740, lr_0 = 1.2392e-04
Loss = 3.1331e-02, PNorm = 71.5009, GNorm = 0.3474, lr_0 = 1.2383e-04
Loss = 3.2254e-02, PNorm = 71.5033, GNorm = 0.7448, lr_0 = 1.2375e-04
Loss = 3.0693e-02, PNorm = 71.5050, GNorm = 0.4831, lr_0 = 1.2366e-04
Loss = 3.4176e-02, PNorm = 71.5063, GNorm = 0.5753, lr_0 = 1.2358e-04
Loss = 3.3526e-02, PNorm = 71.5068, GNorm = 0.3712, lr_0 = 1.2349e-04
Loss = 3.9262e-02, PNorm = 71.5087, GNorm = 0.6664, lr_0 = 1.2341e-04
Loss = 3.0051e-02, PNorm = 71.5110, GNorm = 0.5000, lr_0 = 1.2332e-04
Loss = 2.9359e-02, PNorm = 71.5129, GNorm = 0.6501, lr_0 = 1.2324e-04
Loss = 3.3076e-02, PNorm = 71.5145, GNorm = 0.4712, lr_0 = 1.2315e-04
Loss = 2.9284e-02, PNorm = 71.5158, GNorm = 0.4136, lr_0 = 1.2307e-04
Loss = 3.5711e-02, PNorm = 71.5165, GNorm = 0.4212, lr_0 = 1.2298e-04
Loss = 2.9515e-02, PNorm = 71.5179, GNorm = 0.3644, lr_0 = 1.2290e-04
Loss = 3.5884e-02, PNorm = 71.5188, GNorm = 0.6341, lr_0 = 1.2282e-04
Loss = 3.5235e-02, PNorm = 71.5211, GNorm = 0.4557, lr_0 = 1.2273e-04
Loss = 3.0993e-02, PNorm = 71.5238, GNorm = 0.5263, lr_0 = 1.2265e-04
Loss = 3.4084e-02, PNorm = 71.5263, GNorm = 0.4375, lr_0 = 1.2256e-04
Loss = 3.7857e-02, PNorm = 71.5280, GNorm = 0.8049, lr_0 = 1.2248e-04
Loss = 3.5924e-02, PNorm = 71.5307, GNorm = 0.4118, lr_0 = 1.2240e-04
Loss = 3.3576e-02, PNorm = 71.5337, GNorm = 0.4187, lr_0 = 1.2231e-04
Loss = 3.3000e-02, PNorm = 71.5358, GNorm = 0.3874, lr_0 = 1.2223e-04
Loss = 3.1081e-02, PNorm = 71.5377, GNorm = 0.6264, lr_0 = 1.2214e-04
Loss = 3.3505e-02, PNorm = 71.5391, GNorm = 0.4877, lr_0 = 1.2206e-04
Loss = 3.1159e-02, PNorm = 71.5404, GNorm = 0.5022, lr_0 = 1.2198e-04
Loss = 3.3244e-02, PNorm = 71.5409, GNorm = 0.3714, lr_0 = 1.2189e-04
Loss = 3.4931e-02, PNorm = 71.5429, GNorm = 0.5065, lr_0 = 1.2181e-04
Loss = 3.0550e-02, PNorm = 71.5453, GNorm = 0.7196, lr_0 = 1.2173e-04
Loss = 3.8249e-02, PNorm = 71.5478, GNorm = 0.7921, lr_0 = 1.2164e-04
Loss = 3.4212e-02, PNorm = 71.5489, GNorm = 0.3203, lr_0 = 1.2156e-04
Loss = 2.9881e-02, PNorm = 71.5512, GNorm = 0.4941, lr_0 = 1.2148e-04
Loss = 3.0492e-02, PNorm = 71.5545, GNorm = 0.5712, lr_0 = 1.2139e-04
Loss = 3.8745e-02, PNorm = 71.5570, GNorm = 0.5209, lr_0 = 1.2131e-04
Loss = 3.3897e-02, PNorm = 71.5586, GNorm = 0.4465, lr_0 = 1.2123e-04
Loss = 3.1875e-02, PNorm = 71.5596, GNorm = 0.4789, lr_0 = 1.2114e-04
Loss = 2.9475e-02, PNorm = 71.5609, GNorm = 0.4942, lr_0 = 1.2106e-04
Loss = 3.5398e-02, PNorm = 71.5625, GNorm = 0.4331, lr_0 = 1.2098e-04
Loss = 3.5527e-02, PNorm = 71.5649, GNorm = 0.6457, lr_0 = 1.2090e-04
Loss = 2.9323e-02, PNorm = 71.5672, GNorm = 0.4555, lr_0 = 1.2081e-04
Loss = 3.5376e-02, PNorm = 71.5691, GNorm = 0.4415, lr_0 = 1.2073e-04
Loss = 3.0957e-02, PNorm = 71.5703, GNorm = 0.4133, lr_0 = 1.2065e-04
Loss = 3.4695e-02, PNorm = 71.5721, GNorm = 0.4613, lr_0 = 1.2056e-04
Loss = 2.9640e-02, PNorm = 71.5742, GNorm = 0.3569, lr_0 = 1.2048e-04
Loss = 3.0506e-02, PNorm = 71.5760, GNorm = 0.4804, lr_0 = 1.2040e-04
Loss = 3.5628e-02, PNorm = 71.5781, GNorm = 0.5702, lr_0 = 1.2032e-04
Loss = 3.4018e-02, PNorm = 71.5793, GNorm = 0.4922, lr_0 = 1.2023e-04
Loss = 3.3669e-02, PNorm = 71.5798, GNorm = 0.6677, lr_0 = 1.2015e-04
Loss = 3.1211e-02, PNorm = 71.5813, GNorm = 0.3681, lr_0 = 1.2007e-04
Loss = 3.4105e-02, PNorm = 71.5822, GNorm = 0.6505, lr_0 = 1.1999e-04
Loss = 3.3133e-02, PNorm = 71.5834, GNorm = 0.4256, lr_0 = 1.1991e-04
Loss = 3.2981e-02, PNorm = 71.5859, GNorm = 0.4323, lr_0 = 1.1982e-04
Loss = 4.1148e-02, PNorm = 71.5882, GNorm = 0.4996, lr_0 = 1.1974e-04
Loss = 3.5590e-02, PNorm = 71.5901, GNorm = 0.4544, lr_0 = 1.1966e-04
Loss = 3.5333e-02, PNorm = 71.5912, GNorm = 0.5532, lr_0 = 1.1958e-04
Loss = 2.9126e-02, PNorm = 71.5918, GNorm = 0.5621, lr_0 = 1.1950e-04
Loss = 3.3737e-02, PNorm = 71.5944, GNorm = 0.3637, lr_0 = 1.1941e-04
Loss = 2.9058e-02, PNorm = 71.5969, GNorm = 0.3870, lr_0 = 1.1933e-04
Loss = 3.1970e-02, PNorm = 71.5983, GNorm = 0.4235, lr_0 = 1.1925e-04
Loss = 3.6769e-02, PNorm = 71.5995, GNorm = 0.4857, lr_0 = 1.1917e-04
Loss = 3.2605e-02, PNorm = 71.6016, GNorm = 0.6316, lr_0 = 1.1909e-04
Loss = 2.6899e-02, PNorm = 71.6031, GNorm = 0.3852, lr_0 = 1.1901e-04
Loss = 3.7128e-02, PNorm = 71.6049, GNorm = 0.4301, lr_0 = 1.1892e-04
Loss = 3.5980e-02, PNorm = 71.6068, GNorm = 0.6278, lr_0 = 1.1884e-04
Loss = 2.9087e-02, PNorm = 71.6083, GNorm = 0.4012, lr_0 = 1.1876e-04
Loss = 3.2520e-02, PNorm = 71.6097, GNorm = 0.4513, lr_0 = 1.1868e-04
Loss = 3.4685e-02, PNorm = 71.6102, GNorm = 0.3588, lr_0 = 1.1860e-04
Loss = 3.1961e-02, PNorm = 71.6111, GNorm = 0.4735, lr_0 = 1.1852e-04
Loss = 3.6526e-02, PNorm = 71.6121, GNorm = 0.6711, lr_0 = 1.1844e-04
Loss = 3.9513e-02, PNorm = 71.6142, GNorm = 0.6767, lr_0 = 1.1835e-04
Loss = 3.7916e-02, PNorm = 71.6161, GNorm = 0.5081, lr_0 = 1.1827e-04
Loss = 3.3942e-02, PNorm = 71.6180, GNorm = 0.4504, lr_0 = 1.1819e-04
Loss = 2.9382e-02, PNorm = 71.6189, GNorm = 0.4758, lr_0 = 1.1811e-04
Loss = 3.2044e-02, PNorm = 71.6198, GNorm = 0.4095, lr_0 = 1.1803e-04
Loss = 3.2584e-02, PNorm = 71.6214, GNorm = 0.4601, lr_0 = 1.1795e-04
Loss = 3.5897e-02, PNorm = 71.6231, GNorm = 0.7178, lr_0 = 1.1787e-04
Validation mae = 0.391587
Epoch 28
Loss = 2.9390e-02, PNorm = 71.6253, GNorm = 0.6707, lr_0 = 1.1779e-04
Loss = 2.5687e-02, PNorm = 71.6274, GNorm = 0.5895, lr_0 = 1.1771e-04
Loss = 2.7998e-02, PNorm = 71.6289, GNorm = 0.6936, lr_0 = 1.1763e-04
Loss = 3.2688e-02, PNorm = 71.6304, GNorm = 0.3693, lr_0 = 1.1755e-04
Loss = 3.0616e-02, PNorm = 71.6321, GNorm = 0.6266, lr_0 = 1.1747e-04
Loss = 3.3924e-02, PNorm = 71.6333, GNorm = 0.7180, lr_0 = 1.1739e-04
Loss = 2.9148e-02, PNorm = 71.6347, GNorm = 0.7198, lr_0 = 1.1730e-04
Loss = 3.7700e-02, PNorm = 71.6369, GNorm = 0.7583, lr_0 = 1.1722e-04
Loss = 2.9541e-02, PNorm = 71.6386, GNorm = 0.4473, lr_0 = 1.1714e-04
Loss = 2.5813e-02, PNorm = 71.6398, GNorm = 0.4792, lr_0 = 1.1706e-04
Loss = 3.3792e-02, PNorm = 71.6404, GNorm = 0.4851, lr_0 = 1.1698e-04
Loss = 3.3193e-02, PNorm = 71.6419, GNorm = 0.7932, lr_0 = 1.1690e-04
Loss = 2.4726e-02, PNorm = 71.6435, GNorm = 0.3680, lr_0 = 1.1682e-04
Loss = 2.9029e-02, PNorm = 71.6448, GNorm = 0.3490, lr_0 = 1.1674e-04
Loss = 2.8733e-02, PNorm = 71.6457, GNorm = 0.6827, lr_0 = 1.1666e-04
Loss = 2.9716e-02, PNorm = 71.6467, GNorm = 0.5136, lr_0 = 1.1658e-04
Loss = 2.7651e-02, PNorm = 71.6472, GNorm = 0.3008, lr_0 = 1.1650e-04
Loss = 3.0157e-02, PNorm = 71.6488, GNorm = 0.4745, lr_0 = 1.1642e-04
Loss = 2.5270e-02, PNorm = 71.6510, GNorm = 0.6850, lr_0 = 1.1634e-04
Loss = 3.4974e-02, PNorm = 71.6527, GNorm = 0.6452, lr_0 = 1.1626e-04
Loss = 3.0442e-02, PNorm = 71.6550, GNorm = 0.5527, lr_0 = 1.1618e-04
Loss = 2.8286e-02, PNorm = 71.6571, GNorm = 0.5111, lr_0 = 1.1611e-04
Loss = 3.5242e-02, PNorm = 71.6593, GNorm = 0.4804, lr_0 = 1.1603e-04
Loss = 3.3018e-02, PNorm = 71.6613, GNorm = 0.4085, lr_0 = 1.1595e-04
Loss = 3.0595e-02, PNorm = 71.6625, GNorm = 0.5317, lr_0 = 1.1587e-04
Loss = 2.6541e-02, PNorm = 71.6635, GNorm = 0.3821, lr_0 = 1.1579e-04
Loss = 3.9155e-02, PNorm = 71.6648, GNorm = 0.4684, lr_0 = 1.1571e-04
Loss = 2.9373e-02, PNorm = 71.6669, GNorm = 0.5210, lr_0 = 1.1563e-04
Loss = 3.0728e-02, PNorm = 71.6694, GNorm = 0.4882, lr_0 = 1.1555e-04
Loss = 3.1571e-02, PNorm = 71.6729, GNorm = 0.4026, lr_0 = 1.1547e-04
Loss = 2.9263e-02, PNorm = 71.6753, GNorm = 0.7639, lr_0 = 1.1539e-04
Loss = 2.7739e-02, PNorm = 71.6763, GNorm = 0.4218, lr_0 = 1.1531e-04
Loss = 2.8645e-02, PNorm = 71.6769, GNorm = 0.5094, lr_0 = 1.1523e-04
Loss = 2.7913e-02, PNorm = 71.6791, GNorm = 0.3825, lr_0 = 1.1515e-04
Loss = 2.8577e-02, PNorm = 71.6805, GNorm = 0.5501, lr_0 = 1.1508e-04
Loss = 3.1975e-02, PNorm = 71.6819, GNorm = 0.3835, lr_0 = 1.1500e-04
Loss = 3.0932e-02, PNorm = 71.6830, GNorm = 0.5882, lr_0 = 1.1492e-04
Loss = 3.1402e-02, PNorm = 71.6839, GNorm = 0.4021, lr_0 = 1.1484e-04
Loss = 2.7671e-02, PNorm = 71.6853, GNorm = 0.4920, lr_0 = 1.1476e-04
Loss = 2.8318e-02, PNorm = 71.6856, GNorm = 0.4062, lr_0 = 1.1468e-04
Loss = 2.9354e-02, PNorm = 71.6867, GNorm = 0.3668, lr_0 = 1.1460e-04
Loss = 2.9574e-02, PNorm = 71.6880, GNorm = 0.5373, lr_0 = 1.1452e-04
Loss = 2.8837e-02, PNorm = 71.6891, GNorm = 0.4995, lr_0 = 1.1445e-04
Loss = 2.9673e-02, PNorm = 71.6905, GNorm = 0.5027, lr_0 = 1.1437e-04
Loss = 2.5891e-02, PNorm = 71.6923, GNorm = 0.5062, lr_0 = 1.1429e-04
Loss = 3.0351e-02, PNorm = 71.6935, GNorm = 0.5391, lr_0 = 1.1421e-04
Loss = 3.1547e-02, PNorm = 71.6945, GNorm = 0.4366, lr_0 = 1.1413e-04
Loss = 3.3039e-02, PNorm = 71.6964, GNorm = 0.6292, lr_0 = 1.1405e-04
Loss = 3.0002e-02, PNorm = 71.6993, GNorm = 0.5943, lr_0 = 1.1398e-04
Loss = 3.0840e-02, PNorm = 71.7011, GNorm = 0.6044, lr_0 = 1.1390e-04
Loss = 3.4220e-02, PNorm = 71.7024, GNorm = 0.6494, lr_0 = 1.1382e-04
Loss = 3.1808e-02, PNorm = 71.7049, GNorm = 0.4256, lr_0 = 1.1374e-04
Loss = 3.0871e-02, PNorm = 71.7074, GNorm = 0.5739, lr_0 = 1.1366e-04
Loss = 2.8728e-02, PNorm = 71.7085, GNorm = 0.4347, lr_0 = 1.1359e-04
Loss = 3.2038e-02, PNorm = 71.7090, GNorm = 0.3483, lr_0 = 1.1351e-04
Loss = 2.6499e-02, PNorm = 71.7104, GNorm = 0.4559, lr_0 = 1.1343e-04
Loss = 3.5294e-02, PNorm = 71.7127, GNorm = 0.5375, lr_0 = 1.1335e-04
Loss = 3.0185e-02, PNorm = 71.7147, GNorm = 0.4650, lr_0 = 1.1328e-04
Loss = 3.4488e-02, PNorm = 71.7165, GNorm = 0.4842, lr_0 = 1.1320e-04
Loss = 3.4430e-02, PNorm = 71.7179, GNorm = 0.4700, lr_0 = 1.1312e-04
Loss = 3.0534e-02, PNorm = 71.7189, GNorm = 0.9035, lr_0 = 1.1304e-04
Loss = 2.7760e-02, PNorm = 71.7208, GNorm = 0.5121, lr_0 = 1.1297e-04
Loss = 3.2372e-02, PNorm = 71.7237, GNorm = 0.5247, lr_0 = 1.1289e-04
Loss = 2.9199e-02, PNorm = 71.7254, GNorm = 0.4019, lr_0 = 1.1281e-04
Loss = 3.0689e-02, PNorm = 71.7260, GNorm = 0.3905, lr_0 = 1.1273e-04
Loss = 2.8945e-02, PNorm = 71.7273, GNorm = 0.5005, lr_0 = 1.1266e-04
Loss = 2.7620e-02, PNorm = 71.7290, GNorm = 0.4233, lr_0 = 1.1258e-04
Loss = 2.7959e-02, PNorm = 71.7311, GNorm = 0.4881, lr_0 = 1.1250e-04
Loss = 3.2780e-02, PNorm = 71.7334, GNorm = 0.3622, lr_0 = 1.1243e-04
Loss = 2.4741e-02, PNorm = 71.7351, GNorm = 0.3659, lr_0 = 1.1235e-04
Loss = 2.8100e-02, PNorm = 71.7368, GNorm = 0.4716, lr_0 = 1.1227e-04
Loss = 3.1503e-02, PNorm = 71.7391, GNorm = 0.5098, lr_0 = 1.1219e-04
Loss = 3.2767e-02, PNorm = 71.7404, GNorm = 0.4394, lr_0 = 1.1212e-04
Loss = 3.2217e-02, PNorm = 71.7417, GNorm = 0.4907, lr_0 = 1.1204e-04
Loss = 2.3872e-02, PNorm = 71.7424, GNorm = 0.3871, lr_0 = 1.1196e-04
Loss = 3.2270e-02, PNorm = 71.7438, GNorm = 0.5013, lr_0 = 1.1189e-04
Loss = 3.3190e-02, PNorm = 71.7458, GNorm = 0.4908, lr_0 = 1.1181e-04
Loss = 3.2652e-02, PNorm = 71.7472, GNorm = 0.2906, lr_0 = 1.1173e-04
Loss = 3.1855e-02, PNorm = 71.7491, GNorm = 0.5383, lr_0 = 1.1166e-04
Loss = 3.3980e-02, PNorm = 71.7517, GNorm = 0.5488, lr_0 = 1.1158e-04
Loss = 3.3572e-02, PNorm = 71.7541, GNorm = 0.3815, lr_0 = 1.1150e-04
Loss = 3.5862e-02, PNorm = 71.7556, GNorm = 0.6432, lr_0 = 1.1143e-04
Loss = 3.0447e-02, PNorm = 71.7575, GNorm = 0.8088, lr_0 = 1.1135e-04
Loss = 3.5670e-02, PNorm = 71.7595, GNorm = 0.7001, lr_0 = 1.1128e-04
Loss = 3.2444e-02, PNorm = 71.7609, GNorm = 0.5709, lr_0 = 1.1120e-04
Loss = 3.0199e-02, PNorm = 71.7626, GNorm = 0.4226, lr_0 = 1.1112e-04
Loss = 2.8696e-02, PNorm = 71.7646, GNorm = 0.3574, lr_0 = 1.1105e-04
Loss = 3.1092e-02, PNorm = 71.7659, GNorm = 0.5829, lr_0 = 1.1097e-04
Loss = 3.5131e-02, PNorm = 71.7665, GNorm = 0.3936, lr_0 = 1.1089e-04
Loss = 2.7148e-02, PNorm = 71.7679, GNorm = 0.4261, lr_0 = 1.1082e-04
Loss = 2.7647e-02, PNorm = 71.7691, GNorm = 0.4263, lr_0 = 1.1074e-04
Loss = 3.0044e-02, PNorm = 71.7699, GNorm = 0.5634, lr_0 = 1.1067e-04
Loss = 2.8961e-02, PNorm = 71.7707, GNorm = 0.3741, lr_0 = 1.1059e-04
Loss = 3.3266e-02, PNorm = 71.7716, GNorm = 0.5061, lr_0 = 1.1052e-04
Loss = 3.5819e-02, PNorm = 71.7724, GNorm = 0.4930, lr_0 = 1.1044e-04
Loss = 3.0245e-02, PNorm = 71.7738, GNorm = 0.4403, lr_0 = 1.1036e-04
Loss = 3.0449e-02, PNorm = 71.7753, GNorm = 0.3491, lr_0 = 1.1029e-04
Loss = 2.9882e-02, PNorm = 71.7752, GNorm = 0.4561, lr_0 = 1.1021e-04
Loss = 3.1350e-02, PNorm = 71.7763, GNorm = 0.3355, lr_0 = 1.1014e-04
Loss = 3.3663e-02, PNorm = 71.7770, GNorm = 0.4456, lr_0 = 1.1006e-04
Loss = 3.2491e-02, PNorm = 71.7788, GNorm = 0.4440, lr_0 = 1.0999e-04
Loss = 3.3387e-02, PNorm = 71.7806, GNorm = 0.3091, lr_0 = 1.0991e-04
Loss = 2.5476e-02, PNorm = 71.7816, GNorm = 0.3510, lr_0 = 1.0984e-04
Loss = 3.2394e-02, PNorm = 71.7831, GNorm = 0.4671, lr_0 = 1.0976e-04
Loss = 2.8158e-02, PNorm = 71.7844, GNorm = 0.3954, lr_0 = 1.0969e-04
Loss = 3.4451e-02, PNorm = 71.7865, GNorm = 0.4376, lr_0 = 1.0961e-04
Loss = 3.1482e-02, PNorm = 71.7879, GNorm = 0.5879, lr_0 = 1.0954e-04
Loss = 3.4147e-02, PNorm = 71.7890, GNorm = 0.4780, lr_0 = 1.0946e-04
Loss = 3.2115e-02, PNorm = 71.7901, GNorm = 0.4867, lr_0 = 1.0939e-04
Loss = 3.0650e-02, PNorm = 71.7909, GNorm = 0.3318, lr_0 = 1.0931e-04
Loss = 3.1315e-02, PNorm = 71.7909, GNorm = 0.4557, lr_0 = 1.0924e-04
Loss = 3.0436e-02, PNorm = 71.7923, GNorm = 0.4533, lr_0 = 1.0916e-04
Loss = 3.3049e-02, PNorm = 71.7940, GNorm = 0.5289, lr_0 = 1.0909e-04
Loss = 2.9018e-02, PNorm = 71.7961, GNorm = 0.4318, lr_0 = 1.0901e-04
Loss = 3.0759e-02, PNorm = 71.7984, GNorm = 0.5395, lr_0 = 1.0894e-04
Loss = 3.8012e-02, PNorm = 71.7995, GNorm = 0.6604, lr_0 = 1.0886e-04
Loss = 3.2425e-02, PNorm = 71.8000, GNorm = 0.5564, lr_0 = 1.0879e-04
Loss = 3.0064e-02, PNorm = 71.8021, GNorm = 0.5458, lr_0 = 1.0871e-04
Loss = 2.7975e-02, PNorm = 71.8039, GNorm = 0.3633, lr_0 = 1.0864e-04
Loss = 3.0736e-02, PNorm = 71.8059, GNorm = 0.5597, lr_0 = 1.0856e-04
Validation mae = 0.390787
Epoch 29
Loss = 2.9176e-02, PNorm = 71.8065, GNorm = 0.5546, lr_0 = 1.0849e-04
Loss = 2.9760e-02, PNorm = 71.8073, GNorm = 0.4411, lr_0 = 1.0841e-04
Loss = 2.4935e-02, PNorm = 71.8082, GNorm = 0.4577, lr_0 = 1.0834e-04
Loss = 3.4717e-02, PNorm = 71.8103, GNorm = 0.5470, lr_0 = 1.0827e-04
Loss = 2.6677e-02, PNorm = 71.8124, GNorm = 0.5739, lr_0 = 1.0819e-04
Loss = 3.2356e-02, PNorm = 71.8140, GNorm = 0.6757, lr_0 = 1.0812e-04
Loss = 2.4299e-02, PNorm = 71.8165, GNorm = 0.3991, lr_0 = 1.0804e-04
Loss = 2.8872e-02, PNorm = 71.8182, GNorm = 0.4746, lr_0 = 1.0797e-04
Loss = 2.6227e-02, PNorm = 71.8195, GNorm = 0.3982, lr_0 = 1.0790e-04
Loss = 2.3706e-02, PNorm = 71.8199, GNorm = 0.4914, lr_0 = 1.0782e-04
Loss = 2.5736e-02, PNorm = 71.8220, GNorm = 0.3607, lr_0 = 1.0775e-04
Loss = 2.4728e-02, PNorm = 71.8242, GNorm = 0.2844, lr_0 = 1.0767e-04
Loss = 2.6509e-02, PNorm = 71.8257, GNorm = 0.5450, lr_0 = 1.0760e-04
Loss = 2.9950e-02, PNorm = 71.8266, GNorm = 0.4943, lr_0 = 1.0753e-04
Loss = 2.6487e-02, PNorm = 71.8278, GNorm = 0.3884, lr_0 = 1.0745e-04
Loss = 2.5836e-02, PNorm = 71.8298, GNorm = 0.4013, lr_0 = 1.0738e-04
Loss = 3.2123e-02, PNorm = 71.8314, GNorm = 0.7939, lr_0 = 1.0731e-04
Loss = 2.8785e-02, PNorm = 71.8329, GNorm = 0.6369, lr_0 = 1.0723e-04
Loss = 3.3908e-02, PNorm = 71.8347, GNorm = 0.4851, lr_0 = 1.0716e-04
Loss = 2.4927e-02, PNorm = 71.8361, GNorm = 0.4474, lr_0 = 1.0709e-04
Loss = 2.3900e-02, PNorm = 71.8374, GNorm = 0.5529, lr_0 = 1.0701e-04
Loss = 3.1752e-02, PNorm = 71.8389, GNorm = 0.3877, lr_0 = 1.0694e-04
Loss = 3.2606e-02, PNorm = 71.8407, GNorm = 0.4660, lr_0 = 1.0687e-04
Loss = 2.8325e-02, PNorm = 71.8413, GNorm = 0.4371, lr_0 = 1.0679e-04
Loss = 2.7395e-02, PNorm = 71.8423, GNorm = 0.4380, lr_0 = 1.0672e-04
Loss = 2.5628e-02, PNorm = 71.8439, GNorm = 0.3684, lr_0 = 1.0665e-04
Loss = 2.6245e-02, PNorm = 71.8458, GNorm = 0.3646, lr_0 = 1.0657e-04
Loss = 2.9287e-02, PNorm = 71.8467, GNorm = 0.5588, lr_0 = 1.0650e-04
Loss = 2.6813e-02, PNorm = 71.8471, GNorm = 0.4695, lr_0 = 1.0643e-04
Loss = 2.6660e-02, PNorm = 71.8487, GNorm = 0.4253, lr_0 = 1.0635e-04
Loss = 2.9265e-02, PNorm = 71.8502, GNorm = 0.4810, lr_0 = 1.0628e-04
Loss = 3.0808e-02, PNorm = 71.8513, GNorm = 0.6040, lr_0 = 1.0621e-04
Loss = 2.6011e-02, PNorm = 71.8531, GNorm = 0.4310, lr_0 = 1.0614e-04
Loss = 2.7041e-02, PNorm = 71.8547, GNorm = 0.5505, lr_0 = 1.0606e-04
Loss = 3.0471e-02, PNorm = 71.8558, GNorm = 0.4325, lr_0 = 1.0599e-04
Loss = 2.4849e-02, PNorm = 71.8573, GNorm = 0.6099, lr_0 = 1.0592e-04
Loss = 2.4924e-02, PNorm = 71.8588, GNorm = 0.5098, lr_0 = 1.0585e-04
Loss = 2.5951e-02, PNorm = 71.8600, GNorm = 0.4016, lr_0 = 1.0577e-04
Loss = 2.7131e-02, PNorm = 71.8611, GNorm = 0.4421, lr_0 = 1.0570e-04
Loss = 2.9527e-02, PNorm = 71.8620, GNorm = 0.6067, lr_0 = 1.0563e-04
Loss = 3.6322e-02, PNorm = 71.8629, GNorm = 0.5212, lr_0 = 1.0556e-04
Loss = 3.0253e-02, PNorm = 71.8647, GNorm = 0.4663, lr_0 = 1.0548e-04
Loss = 3.3032e-02, PNorm = 71.8666, GNorm = 0.4691, lr_0 = 1.0541e-04
Loss = 3.5010e-02, PNorm = 71.8687, GNorm = 0.7308, lr_0 = 1.0534e-04
Loss = 3.5438e-02, PNorm = 71.8712, GNorm = 0.3721, lr_0 = 1.0527e-04
Loss = 2.7286e-02, PNorm = 71.8725, GNorm = 0.5536, lr_0 = 1.0519e-04
Loss = 3.1770e-02, PNorm = 71.8741, GNorm = 0.5043, lr_0 = 1.0512e-04
Loss = 2.6436e-02, PNorm = 71.8751, GNorm = 0.5597, lr_0 = 1.0505e-04
Loss = 3.1864e-02, PNorm = 71.8758, GNorm = 0.5243, lr_0 = 1.0498e-04
Loss = 2.8062e-02, PNorm = 71.8767, GNorm = 0.7117, lr_0 = 1.0491e-04
Loss = 2.9883e-02, PNorm = 71.8780, GNorm = 0.5164, lr_0 = 1.0483e-04
Loss = 2.7617e-02, PNorm = 71.8791, GNorm = 0.4199, lr_0 = 1.0476e-04
Loss = 3.0191e-02, PNorm = 71.8809, GNorm = 0.4812, lr_0 = 1.0469e-04
Loss = 2.8159e-02, PNorm = 71.8825, GNorm = 0.4107, lr_0 = 1.0462e-04
Loss = 2.9854e-02, PNorm = 71.8847, GNorm = 0.3924, lr_0 = 1.0455e-04
Loss = 3.0383e-02, PNorm = 71.8858, GNorm = 0.4753, lr_0 = 1.0448e-04
Loss = 3.3739e-02, PNorm = 71.8863, GNorm = 0.5078, lr_0 = 1.0440e-04
Loss = 3.0908e-02, PNorm = 71.8864, GNorm = 0.6809, lr_0 = 1.0433e-04
Loss = 2.5987e-02, PNorm = 71.8863, GNorm = 0.3995, lr_0 = 1.0426e-04
Loss = 2.9935e-02, PNorm = 71.8875, GNorm = 0.4803, lr_0 = 1.0419e-04
Loss = 2.1898e-02, PNorm = 71.8893, GNorm = 0.3537, lr_0 = 1.0412e-04
Loss = 2.4514e-02, PNorm = 71.8910, GNorm = 0.3825, lr_0 = 1.0405e-04
Loss = 2.8373e-02, PNorm = 71.8926, GNorm = 0.4626, lr_0 = 1.0398e-04
Loss = 2.9717e-02, PNorm = 71.8944, GNorm = 0.3163, lr_0 = 1.0391e-04
Loss = 2.6818e-02, PNorm = 71.8964, GNorm = 0.5074, lr_0 = 1.0383e-04
Loss = 3.2471e-02, PNorm = 71.8980, GNorm = 0.4798, lr_0 = 1.0376e-04
Loss = 3.2450e-02, PNorm = 71.8992, GNorm = 0.3932, lr_0 = 1.0369e-04
Loss = 3.1912e-02, PNorm = 71.9001, GNorm = 0.3159, lr_0 = 1.0362e-04
Loss = 3.2637e-02, PNorm = 71.9014, GNorm = 0.4564, lr_0 = 1.0355e-04
Loss = 2.8262e-02, PNorm = 71.9028, GNorm = 0.5836, lr_0 = 1.0348e-04
Loss = 2.8854e-02, PNorm = 71.9042, GNorm = 0.5011, lr_0 = 1.0341e-04
Loss = 2.8310e-02, PNorm = 71.9054, GNorm = 0.4089, lr_0 = 1.0334e-04
Loss = 2.7740e-02, PNorm = 71.9071, GNorm = 0.3862, lr_0 = 1.0327e-04
Loss = 2.8640e-02, PNorm = 71.9075, GNorm = 0.5089, lr_0 = 1.0320e-04
Loss = 2.7371e-02, PNorm = 71.9083, GNorm = 0.3425, lr_0 = 1.0312e-04
Loss = 2.8674e-02, PNorm = 71.9094, GNorm = 0.5513, lr_0 = 1.0305e-04
Loss = 2.7754e-02, PNorm = 71.9105, GNorm = 0.5051, lr_0 = 1.0298e-04
Loss = 2.8796e-02, PNorm = 71.9121, GNorm = 0.3811, lr_0 = 1.0291e-04
Loss = 3.1996e-02, PNorm = 71.9129, GNorm = 0.4209, lr_0 = 1.0284e-04
Loss = 2.8212e-02, PNorm = 71.9134, GNorm = 0.3401, lr_0 = 1.0277e-04
Loss = 2.8408e-02, PNorm = 71.9144, GNorm = 0.5318, lr_0 = 1.0270e-04
Loss = 2.9140e-02, PNorm = 71.9161, GNorm = 0.5005, lr_0 = 1.0263e-04
Loss = 3.3981e-02, PNorm = 71.9176, GNorm = 0.6876, lr_0 = 1.0256e-04
Loss = 2.7511e-02, PNorm = 71.9194, GNorm = 0.4531, lr_0 = 1.0249e-04
Loss = 3.0192e-02, PNorm = 71.9204, GNorm = 0.5464, lr_0 = 1.0242e-04
Loss = 3.0497e-02, PNorm = 71.9211, GNorm = 0.3622, lr_0 = 1.0235e-04
Loss = 3.2195e-02, PNorm = 71.9220, GNorm = 0.5005, lr_0 = 1.0228e-04
Loss = 2.6956e-02, PNorm = 71.9235, GNorm = 0.6776, lr_0 = 1.0221e-04
Loss = 3.5329e-02, PNorm = 71.9241, GNorm = 0.9544, lr_0 = 1.0214e-04
Loss = 2.7265e-02, PNorm = 71.9256, GNorm = 0.5065, lr_0 = 1.0207e-04
Loss = 3.2792e-02, PNorm = 71.9270, GNorm = 0.3867, lr_0 = 1.0200e-04
Loss = 3.1068e-02, PNorm = 71.9279, GNorm = 0.4598, lr_0 = 1.0193e-04
Loss = 2.7764e-02, PNorm = 71.9290, GNorm = 0.4059, lr_0 = 1.0186e-04
Loss = 3.3885e-02, PNorm = 71.9300, GNorm = 0.4577, lr_0 = 1.0179e-04
Loss = 2.5076e-02, PNorm = 71.9312, GNorm = 0.5165, lr_0 = 1.0172e-04
Loss = 3.2112e-02, PNorm = 71.9321, GNorm = 0.4100, lr_0 = 1.0165e-04
Loss = 2.9520e-02, PNorm = 71.9334, GNorm = 0.4789, lr_0 = 1.0158e-04
Loss = 3.0561e-02, PNorm = 71.9348, GNorm = 0.5055, lr_0 = 1.0151e-04
Loss = 3.7310e-02, PNorm = 71.9365, GNorm = 0.4490, lr_0 = 1.0144e-04
Loss = 3.0502e-02, PNorm = 71.9372, GNorm = 0.4730, lr_0 = 1.0137e-04
Loss = 3.4819e-02, PNorm = 71.9387, GNorm = 0.4821, lr_0 = 1.0130e-04
Loss = 3.6435e-02, PNorm = 71.9411, GNorm = 0.5005, lr_0 = 1.0123e-04
Loss = 3.3876e-02, PNorm = 71.9421, GNorm = 0.5071, lr_0 = 1.0116e-04
Loss = 4.0605e-02, PNorm = 71.9436, GNorm = 0.5145, lr_0 = 1.0110e-04
Loss = 2.8119e-02, PNorm = 71.9445, GNorm = 0.4346, lr_0 = 1.0103e-04
Loss = 3.2111e-02, PNorm = 71.9459, GNorm = 0.6526, lr_0 = 1.0096e-04
Loss = 2.8293e-02, PNorm = 71.9474, GNorm = 0.4061, lr_0 = 1.0089e-04
Loss = 3.1636e-02, PNorm = 71.9488, GNorm = 0.4795, lr_0 = 1.0082e-04
Loss = 2.7451e-02, PNorm = 71.9504, GNorm = 0.3740, lr_0 = 1.0075e-04
Loss = 2.6562e-02, PNorm = 71.9519, GNorm = 0.4480, lr_0 = 1.0068e-04
Loss = 3.0988e-02, PNorm = 71.9530, GNorm = 0.4914, lr_0 = 1.0061e-04
Loss = 3.2576e-02, PNorm = 71.9544, GNorm = 0.4412, lr_0 = 1.0054e-04
Loss = 2.7278e-02, PNorm = 71.9551, GNorm = 0.4562, lr_0 = 1.0047e-04
Loss = 2.7952e-02, PNorm = 71.9566, GNorm = 0.4334, lr_0 = 1.0041e-04
Loss = 3.3036e-02, PNorm = 71.9586, GNorm = 0.4596, lr_0 = 1.0034e-04
Loss = 3.4372e-02, PNorm = 71.9597, GNorm = 0.7987, lr_0 = 1.0027e-04
Loss = 3.2760e-02, PNorm = 71.9608, GNorm = 0.4530, lr_0 = 1.0020e-04
Loss = 2.9946e-02, PNorm = 71.9617, GNorm = 0.6741, lr_0 = 1.0013e-04
Loss = 2.7780e-02, PNorm = 71.9627, GNorm = 0.5141, lr_0 = 1.0006e-04
Loss = 3.0071e-02, PNorm = 71.9638, GNorm = 0.3436, lr_0 = 1.0000e-04
Validation mae = 0.393505
Model 0 best validation mae = 0.387045 on epoch 18
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Moving model to cuda
Model 0 test mae = 0.380300
Ensemble test mae = 0.380300
Fold 7
Splitting data with seed 7
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.0, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=400, bias=False)
        (W_h): Linear(in_features=400, out_features=400, bias=False)
        (W_o): Linear(in_features=533, out_features=400, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=400, out_features=400, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=400, out_features=1, bias=True)
  )
)
Number of parameters = 593,201
Moving model to cuda
Epoch 0
Loss = 1.0149e+00, PNorm = 38.3707, GNorm = 1.4592, lr_0 = 1.0413e-04
Loss = 8.8192e-01, PNorm = 38.3736, GNorm = 4.8805, lr_0 = 1.0788e-04
Loss = 7.6331e-01, PNorm = 38.3768, GNorm = 9.3107, lr_0 = 1.1163e-04
Loss = 6.6939e-01, PNorm = 38.3794, GNorm = 10.7935, lr_0 = 1.1537e-04
Loss = 7.5075e-01, PNorm = 38.3821, GNorm = 3.1547, lr_0 = 1.1913e-04
Loss = 6.7902e-01, PNorm = 38.3861, GNorm = 4.3124, lr_0 = 1.2287e-04
Loss = 5.6562e-01, PNorm = 38.3912, GNorm = 1.2024, lr_0 = 1.2663e-04
Loss = 5.7323e-01, PNorm = 38.3970, GNorm = 2.9289, lr_0 = 1.3038e-04
Loss = 5.5306e-01, PNorm = 38.4034, GNorm = 6.0404, lr_0 = 1.3413e-04
Loss = 5.2285e-01, PNorm = 38.4095, GNorm = 1.4904, lr_0 = 1.3788e-04
Loss = 5.2406e-01, PNorm = 38.4155, GNorm = 6.9758, lr_0 = 1.4163e-04
Loss = 5.4013e-01, PNorm = 38.4218, GNorm = 3.8640, lr_0 = 1.4537e-04
Loss = 5.1503e-01, PNorm = 38.4281, GNorm = 15.7637, lr_0 = 1.4913e-04
Loss = 4.9143e-01, PNorm = 38.4340, GNorm = 3.0119, lr_0 = 1.5288e-04
Loss = 4.7428e-01, PNorm = 38.4407, GNorm = 11.9547, lr_0 = 1.5662e-04
Loss = 4.6112e-01, PNorm = 38.4467, GNorm = 11.2914, lr_0 = 1.6038e-04
Loss = 4.1656e-01, PNorm = 38.4531, GNorm = 9.6733, lr_0 = 1.6412e-04
Loss = 4.2842e-01, PNorm = 38.4605, GNorm = 15.4243, lr_0 = 1.6788e-04
Loss = 4.7175e-01, PNorm = 38.4643, GNorm = 24.5764, lr_0 = 1.7163e-04
Loss = 4.3056e-01, PNorm = 38.4688, GNorm = 5.2828, lr_0 = 1.7538e-04
Loss = 4.0684e-01, PNorm = 38.4737, GNorm = 3.6554, lr_0 = 1.7913e-04
Loss = 4.3042e-01, PNorm = 38.4776, GNorm = 6.4059, lr_0 = 1.8288e-04
Loss = 4.0220e-01, PNorm = 38.4828, GNorm = 11.8160, lr_0 = 1.8662e-04
Loss = 3.6263e-01, PNorm = 38.4887, GNorm = 3.6186, lr_0 = 1.9038e-04
Loss = 3.9766e-01, PNorm = 38.4939, GNorm = 12.2718, lr_0 = 1.9413e-04
Loss = 3.7043e-01, PNorm = 38.4983, GNorm = 33.3323, lr_0 = 1.9788e-04
Loss = 3.7210e-01, PNorm = 38.5021, GNorm = 1.4963, lr_0 = 2.0163e-04
Loss = 3.4517e-01, PNorm = 38.5075, GNorm = 4.9092, lr_0 = 2.0537e-04
Loss = 3.3552e-01, PNorm = 38.5139, GNorm = 1.9063, lr_0 = 2.0913e-04
Loss = 3.5859e-01, PNorm = 38.5191, GNorm = 28.8678, lr_0 = 2.1288e-04
Loss = 3.9600e-01, PNorm = 38.5236, GNorm = 5.1924, lr_0 = 2.1663e-04
Loss = 4.1415e-01, PNorm = 38.5290, GNorm = 22.8640, lr_0 = 2.2038e-04
Loss = 4.4181e-01, PNorm = 38.5341, GNorm = 16.0335, lr_0 = 2.2412e-04
Loss = 3.5009e-01, PNorm = 38.5400, GNorm = 3.3431, lr_0 = 2.2787e-04
Loss = 3.4987e-01, PNorm = 38.5463, GNorm = 15.2503, lr_0 = 2.3163e-04
Loss = 3.4275e-01, PNorm = 38.5510, GNorm = 8.5217, lr_0 = 2.3538e-04
Loss = 3.7286e-01, PNorm = 38.5543, GNorm = 5.4651, lr_0 = 2.3913e-04
Loss = 3.4466e-01, PNorm = 38.5601, GNorm = 13.3925, lr_0 = 2.4288e-04
Loss = 3.2671e-01, PNorm = 38.5645, GNorm = 2.6034, lr_0 = 2.4662e-04
Loss = 4.0583e-01, PNorm = 38.5687, GNorm = 15.4112, lr_0 = 2.5038e-04
Loss = 3.1425e-01, PNorm = 38.5733, GNorm = 9.4765, lr_0 = 2.5413e-04
Loss = 3.2676e-01, PNorm = 38.5793, GNorm = 4.1629, lr_0 = 2.5788e-04
Loss = 3.3035e-01, PNorm = 38.5860, GNorm = 18.1019, lr_0 = 2.6163e-04
Loss = 3.6867e-01, PNorm = 38.5922, GNorm = 12.5427, lr_0 = 2.6537e-04
Loss = 4.5800e-01, PNorm = 38.5960, GNorm = 16.3319, lr_0 = 2.6912e-04
Loss = 4.3345e-01, PNorm = 38.6022, GNorm = 6.9376, lr_0 = 2.7288e-04
Loss = 3.9797e-01, PNorm = 38.6089, GNorm = 9.3216, lr_0 = 2.7663e-04
Loss = 5.0379e-01, PNorm = 38.6154, GNorm = 14.0541, lr_0 = 2.8038e-04
Loss = 3.8340e-01, PNorm = 38.6245, GNorm = 3.4417, lr_0 = 2.8413e-04
Loss = 3.3311e-01, PNorm = 38.6329, GNorm = 9.9862, lr_0 = 2.8787e-04
Loss = 3.9874e-01, PNorm = 38.6388, GNorm = 17.4770, lr_0 = 2.9163e-04
Loss = 3.3371e-01, PNorm = 38.6455, GNorm = 10.1106, lr_0 = 2.9538e-04
Loss = 3.9211e-01, PNorm = 38.6512, GNorm = 0.7430, lr_0 = 2.9913e-04
Loss = 3.6710e-01, PNorm = 38.6585, GNorm = 6.4481, lr_0 = 3.0288e-04
Loss = 3.5473e-01, PNorm = 38.6683, GNorm = 1.1745, lr_0 = 3.0662e-04
Loss = 3.4776e-01, PNorm = 38.6739, GNorm = 8.5468, lr_0 = 3.1037e-04
Loss = 3.0261e-01, PNorm = 38.6813, GNorm = 0.9637, lr_0 = 3.1413e-04
Loss = 3.4764e-01, PNorm = 38.6898, GNorm = 10.5861, lr_0 = 3.1788e-04
Loss = 3.1291e-01, PNorm = 38.6943, GNorm = 1.5454, lr_0 = 3.2163e-04
Loss = 3.2629e-01, PNorm = 38.6976, GNorm = 24.1434, lr_0 = 3.2538e-04
Loss = 3.2587e-01, PNorm = 38.7005, GNorm = 2.0042, lr_0 = 3.2912e-04
Loss = 3.0288e-01, PNorm = 38.7057, GNorm = 3.5485, lr_0 = 3.3288e-04
Loss = 3.6924e-01, PNorm = 38.7134, GNorm = 10.3989, lr_0 = 3.3663e-04
Loss = 3.2413e-01, PNorm = 38.7195, GNorm = 8.8295, lr_0 = 3.4038e-04
Loss = 3.2179e-01, PNorm = 38.7249, GNorm = 15.2253, lr_0 = 3.4413e-04
Loss = 3.5199e-01, PNorm = 38.7331, GNorm = 7.6117, lr_0 = 3.4787e-04
Loss = 3.6157e-01, PNorm = 38.7409, GNorm = 6.5602, lr_0 = 3.5162e-04
Loss = 3.1720e-01, PNorm = 38.7478, GNorm = 5.0534, lr_0 = 3.5538e-04
Loss = 3.2194e-01, PNorm = 38.7583, GNorm = 1.8361, lr_0 = 3.5913e-04
Loss = 2.7317e-01, PNorm = 38.7659, GNorm = 1.6453, lr_0 = 3.6288e-04
Loss = 2.4951e-01, PNorm = 38.7722, GNorm = 7.0253, lr_0 = 3.6662e-04
Loss = 2.9322e-01, PNorm = 38.7799, GNorm = 7.2492, lr_0 = 3.7037e-04
Loss = 2.9951e-01, PNorm = 38.7835, GNorm = 9.5621, lr_0 = 3.7413e-04
Loss = 3.6334e-01, PNorm = 38.7889, GNorm = 3.8171, lr_0 = 3.7788e-04
Loss = 3.6750e-01, PNorm = 38.7952, GNorm = 11.4178, lr_0 = 3.8163e-04
Loss = 3.7216e-01, PNorm = 38.8014, GNorm = 1.4022, lr_0 = 3.8537e-04
Loss = 3.9063e-01, PNorm = 38.8124, GNorm = 13.4785, lr_0 = 3.8912e-04
Loss = 3.5596e-01, PNorm = 38.8249, GNorm = 3.2899, lr_0 = 3.9287e-04
Loss = 2.9197e-01, PNorm = 38.8363, GNorm = 9.1759, lr_0 = 3.9663e-04
Loss = 3.1103e-01, PNorm = 38.8466, GNorm = 7.3737, lr_0 = 4.0038e-04
Loss = 3.2669e-01, PNorm = 38.8542, GNorm = 2.0675, lr_0 = 4.0413e-04
Loss = 3.2831e-01, PNorm = 38.8623, GNorm = 2.9055, lr_0 = 4.0787e-04
Loss = 2.8337e-01, PNorm = 38.8725, GNorm = 4.7457, lr_0 = 4.1162e-04
Loss = 2.5856e-01, PNorm = 38.8796, GNorm = 10.0394, lr_0 = 4.1537e-04
Loss = 2.8744e-01, PNorm = 38.8847, GNorm = 8.5148, lr_0 = 4.1913e-04
Loss = 2.9596e-01, PNorm = 38.8920, GNorm = 4.2701, lr_0 = 4.2288e-04
Loss = 2.7965e-01, PNorm = 38.9007, GNorm = 1.9156, lr_0 = 4.2662e-04
Loss = 3.2650e-01, PNorm = 38.9110, GNorm = 2.0637, lr_0 = 4.3037e-04
Loss = 2.9398e-01, PNorm = 38.9209, GNorm = 12.2257, lr_0 = 4.3412e-04
Loss = 3.4074e-01, PNorm = 38.9310, GNorm = 15.5419, lr_0 = 4.3788e-04
Loss = 3.3066e-01, PNorm = 38.9374, GNorm = 1.7045, lr_0 = 4.4163e-04
Loss = 2.9682e-01, PNorm = 38.9492, GNorm = 1.2138, lr_0 = 4.4538e-04
Loss = 3.1022e-01, PNorm = 38.9623, GNorm = 1.4057, lr_0 = 4.4912e-04
Loss = 2.7457e-01, PNorm = 38.9694, GNorm = 1.6924, lr_0 = 4.5287e-04
Loss = 2.6815e-01, PNorm = 38.9791, GNorm = 6.9010, lr_0 = 4.5662e-04
Loss = 2.5934e-01, PNorm = 38.9862, GNorm = 1.4167, lr_0 = 4.6038e-04
Loss = 3.0658e-01, PNorm = 38.9927, GNorm = 3.4436, lr_0 = 4.6413e-04
Loss = 3.2299e-01, PNorm = 39.0050, GNorm = 3.6895, lr_0 = 4.6787e-04
Loss = 3.4946e-01, PNorm = 39.0190, GNorm = 10.9167, lr_0 = 4.7162e-04
Loss = 3.0652e-01, PNorm = 39.0334, GNorm = 5.8620, lr_0 = 4.7537e-04
Loss = 2.9569e-01, PNorm = 39.0435, GNorm = 2.8878, lr_0 = 4.7913e-04
Loss = 3.2111e-01, PNorm = 39.0534, GNorm = 8.5960, lr_0 = 4.8288e-04
Loss = 2.8477e-01, PNorm = 39.0636, GNorm = 3.9760, lr_0 = 4.8663e-04
Loss = 3.2074e-01, PNorm = 39.0771, GNorm = 3.4330, lr_0 = 4.9038e-04
Loss = 2.8666e-01, PNorm = 39.0903, GNorm = 5.7473, lr_0 = 4.9412e-04
Loss = 2.8271e-01, PNorm = 39.0957, GNorm = 9.6431, lr_0 = 4.9788e-04
Loss = 3.0385e-01, PNorm = 39.1029, GNorm = 7.4302, lr_0 = 5.0163e-04
Loss = 2.6072e-01, PNorm = 39.1164, GNorm = 2.5772, lr_0 = 5.0538e-04
Loss = 2.6817e-01, PNorm = 39.1333, GNorm = 9.3336, lr_0 = 5.0913e-04
Loss = 3.2807e-01, PNorm = 39.1402, GNorm = 6.7481, lr_0 = 5.1287e-04
Loss = 2.7856e-01, PNorm = 39.1528, GNorm = 10.2497, lr_0 = 5.1663e-04
Loss = 2.9163e-01, PNorm = 39.1643, GNorm = 6.1986, lr_0 = 5.2038e-04
Loss = 3.1150e-01, PNorm = 39.1759, GNorm = 1.8209, lr_0 = 5.2413e-04
Loss = 2.8543e-01, PNorm = 39.1865, GNorm = 8.7411, lr_0 = 5.2788e-04
Loss = 2.5603e-01, PNorm = 39.2024, GNorm = 1.7400, lr_0 = 5.3162e-04
Loss = 2.7947e-01, PNorm = 39.2191, GNorm = 1.9367, lr_0 = 5.3538e-04
Loss = 2.3628e-01, PNorm = 39.2269, GNorm = 1.5914, lr_0 = 5.3912e-04
Loss = 2.6124e-01, PNorm = 39.2360, GNorm = 7.6272, lr_0 = 5.4288e-04
Loss = 3.3997e-01, PNorm = 39.2427, GNorm = 3.9656, lr_0 = 5.4663e-04
Loss = 2.9630e-01, PNorm = 39.2571, GNorm = 4.5924, lr_0 = 5.5038e-04
Validation mae = 0.598560
Epoch 1
Loss = 2.7992e-01, PNorm = 39.2762, GNorm = 1.4093, lr_0 = 5.5413e-04
Loss = 2.8965e-01, PNorm = 39.2920, GNorm = 6.7782, lr_0 = 5.5787e-04
Loss = 3.0514e-01, PNorm = 39.3040, GNorm = 10.4095, lr_0 = 5.6163e-04
Loss = 2.7147e-01, PNorm = 39.3158, GNorm = 1.3802, lr_0 = 5.6538e-04
Loss = 2.8536e-01, PNorm = 39.3291, GNorm = 1.8938, lr_0 = 5.6913e-04
Loss = 2.5028e-01, PNorm = 39.3407, GNorm = 1.4104, lr_0 = 5.7288e-04
Loss = 2.2911e-01, PNorm = 39.3506, GNorm = 7.3504, lr_0 = 5.7662e-04
Loss = 2.3561e-01, PNorm = 39.3679, GNorm = 9.6084, lr_0 = 5.8038e-04
Loss = 2.6431e-01, PNorm = 39.3795, GNorm = 7.5993, lr_0 = 5.8413e-04
Loss = 2.2075e-01, PNorm = 39.3904, GNorm = 10.8210, lr_0 = 5.8788e-04
Loss = 2.6302e-01, PNorm = 39.4031, GNorm = 5.1883, lr_0 = 5.9163e-04
Loss = 2.2641e-01, PNorm = 39.4155, GNorm = 6.7882, lr_0 = 5.9538e-04
Loss = 2.7413e-01, PNorm = 39.4288, GNorm = 9.6325, lr_0 = 5.9913e-04
Loss = 2.8514e-01, PNorm = 39.4410, GNorm = 9.6276, lr_0 = 6.0288e-04
Loss = 2.9428e-01, PNorm = 39.4595, GNorm = 4.7219, lr_0 = 6.0663e-04
Loss = 2.6642e-01, PNorm = 39.4806, GNorm = 1.7949, lr_0 = 6.1038e-04
Loss = 2.6688e-01, PNorm = 39.4937, GNorm = 1.8882, lr_0 = 6.1413e-04
Loss = 2.5224e-01, PNorm = 39.5027, GNorm = 5.9527, lr_0 = 6.1788e-04
Loss = 2.4101e-01, PNorm = 39.5167, GNorm = 4.2315, lr_0 = 6.2163e-04
Loss = 2.3677e-01, PNorm = 39.5340, GNorm = 7.1066, lr_0 = 6.2538e-04
Loss = 2.6741e-01, PNorm = 39.5542, GNorm = 2.0634, lr_0 = 6.2913e-04
Loss = 2.5386e-01, PNorm = 39.5700, GNorm = 3.0528, lr_0 = 6.3288e-04
Loss = 2.5229e-01, PNorm = 39.5854, GNorm = 11.4631, lr_0 = 6.3663e-04
Loss = 2.3886e-01, PNorm = 39.6041, GNorm = 5.8003, lr_0 = 6.4038e-04
Loss = 2.6073e-01, PNorm = 39.6215, GNorm = 10.1238, lr_0 = 6.4413e-04
Loss = 2.5856e-01, PNorm = 39.6400, GNorm = 1.7606, lr_0 = 6.4788e-04
Loss = 2.2283e-01, PNorm = 39.6565, GNorm = 7.0888, lr_0 = 6.5163e-04
Loss = 2.6352e-01, PNorm = 39.6744, GNorm = 2.7604, lr_0 = 6.5538e-04
Loss = 2.5582e-01, PNorm = 39.6924, GNorm = 1.9467, lr_0 = 6.5913e-04
Loss = 2.1685e-01, PNorm = 39.7054, GNorm = 1.3897, lr_0 = 6.6288e-04
Loss = 2.5069e-01, PNorm = 39.7125, GNorm = 2.9437, lr_0 = 6.6663e-04
Loss = 2.8132e-01, PNorm = 39.7245, GNorm = 12.4675, lr_0 = 6.7038e-04
Loss = 2.4357e-01, PNorm = 39.7350, GNorm = 5.2573, lr_0 = 6.7413e-04
Loss = 2.5565e-01, PNorm = 39.7566, GNorm = 5.7195, lr_0 = 6.7788e-04
Loss = 2.7831e-01, PNorm = 39.7739, GNorm = 1.8129, lr_0 = 6.8163e-04
Loss = 2.4478e-01, PNorm = 39.7875, GNorm = 6.1755, lr_0 = 6.8538e-04
Loss = 3.1011e-01, PNorm = 39.8097, GNorm = 9.5388, lr_0 = 6.8913e-04
Loss = 2.7191e-01, PNorm = 39.8333, GNorm = 11.7839, lr_0 = 6.9288e-04
Loss = 2.4447e-01, PNorm = 39.8585, GNorm = 2.2303, lr_0 = 6.9663e-04
Loss = 2.5527e-01, PNorm = 39.8743, GNorm = 2.8539, lr_0 = 7.0038e-04
Loss = 2.5440e-01, PNorm = 39.8850, GNorm = 6.5275, lr_0 = 7.0413e-04
Loss = 2.3234e-01, PNorm = 39.9021, GNorm = 4.2658, lr_0 = 7.0788e-04
Loss = 2.1703e-01, PNorm = 39.9210, GNorm = 3.1221, lr_0 = 7.1163e-04
Loss = 2.2786e-01, PNorm = 39.9353, GNorm = 2.3714, lr_0 = 7.1538e-04
Loss = 2.3638e-01, PNorm = 39.9469, GNorm = 3.5644, lr_0 = 7.1913e-04
Loss = 2.5711e-01, PNorm = 39.9581, GNorm = 4.6805, lr_0 = 7.2288e-04
Loss = 2.4247e-01, PNorm = 39.9734, GNorm = 12.2444, lr_0 = 7.2663e-04
Loss = 3.1557e-01, PNorm = 39.9902, GNorm = 5.7740, lr_0 = 7.3038e-04
Loss = 2.5387e-01, PNorm = 40.0132, GNorm = 1.6879, lr_0 = 7.3413e-04
Loss = 2.6554e-01, PNorm = 40.0320, GNorm = 7.8598, lr_0 = 7.3788e-04
Loss = 2.4879e-01, PNorm = 40.0578, GNorm = 3.8817, lr_0 = 7.4163e-04
Loss = 2.6475e-01, PNorm = 40.0749, GNorm = 2.5488, lr_0 = 7.4538e-04
Loss = 3.1859e-01, PNorm = 40.0957, GNorm = 5.1045, lr_0 = 7.4913e-04
Loss = 2.9184e-01, PNorm = 40.1198, GNorm = 2.4262, lr_0 = 7.5288e-04
Loss = 3.0819e-01, PNorm = 40.1464, GNorm = 4.3321, lr_0 = 7.5663e-04
Loss = 2.9119e-01, PNorm = 40.1763, GNorm = 1.7004, lr_0 = 7.6038e-04
Loss = 2.3433e-01, PNorm = 40.2004, GNorm = 1.6102, lr_0 = 7.6413e-04
Loss = 2.1071e-01, PNorm = 40.2222, GNorm = 1.9009, lr_0 = 7.6788e-04
Loss = 2.2667e-01, PNorm = 40.2418, GNorm = 3.0129, lr_0 = 7.7163e-04
Loss = 2.2866e-01, PNorm = 40.2523, GNorm = 4.8506, lr_0 = 7.7538e-04
Loss = 2.7157e-01, PNorm = 40.2664, GNorm = 3.7691, lr_0 = 7.7913e-04
Loss = 2.2524e-01, PNorm = 40.2945, GNorm = 4.4849, lr_0 = 7.8288e-04
Loss = 2.6456e-01, PNorm = 40.3152, GNorm = 4.9596, lr_0 = 7.8663e-04
Loss = 2.7619e-01, PNorm = 40.3341, GNorm = 3.7459, lr_0 = 7.9038e-04
Loss = 2.3119e-01, PNorm = 40.3496, GNorm = 1.7874, lr_0 = 7.9413e-04
Loss = 2.2424e-01, PNorm = 40.3733, GNorm = 0.7400, lr_0 = 7.9788e-04
Loss = 2.2446e-01, PNorm = 40.3918, GNorm = 1.9314, lr_0 = 8.0163e-04
Loss = 2.4564e-01, PNorm = 40.4054, GNorm = 7.2177, lr_0 = 8.0538e-04
Loss = 2.9072e-01, PNorm = 40.4252, GNorm = 6.0341, lr_0 = 8.0913e-04
Loss = 2.2338e-01, PNorm = 40.4490, GNorm = 0.9887, lr_0 = 8.1288e-04
Loss = 2.5648e-01, PNorm = 40.4830, GNorm = 1.5345, lr_0 = 8.1663e-04
Loss = 2.6376e-01, PNorm = 40.5099, GNorm = 1.5359, lr_0 = 8.2038e-04
Loss = 2.3900e-01, PNorm = 40.5269, GNorm = 1.8407, lr_0 = 8.2413e-04
Loss = 2.3588e-01, PNorm = 40.5430, GNorm = 1.6704, lr_0 = 8.2788e-04
Loss = 2.6164e-01, PNorm = 40.5559, GNorm = 3.8109, lr_0 = 8.3163e-04
Loss = 2.5248e-01, PNorm = 40.5750, GNorm = 3.3410, lr_0 = 8.3538e-04
Loss = 2.8277e-01, PNorm = 40.5926, GNorm = 1.1596, lr_0 = 8.3913e-04
Loss = 2.3444e-01, PNorm = 40.6229, GNorm = 1.1328, lr_0 = 8.4288e-04
Loss = 2.0681e-01, PNorm = 40.6441, GNorm = 1.8296, lr_0 = 8.4663e-04
Loss = 2.1992e-01, PNorm = 40.6649, GNorm = 4.7231, lr_0 = 8.5038e-04
Loss = 2.1805e-01, PNorm = 40.6819, GNorm = 2.0865, lr_0 = 8.5413e-04
Loss = 1.8491e-01, PNorm = 40.6952, GNorm = 1.2585, lr_0 = 8.5788e-04
Loss = 2.2011e-01, PNorm = 40.7113, GNorm = 5.7286, lr_0 = 8.6163e-04
Loss = 2.9495e-01, PNorm = 40.7391, GNorm = 2.4618, lr_0 = 8.6538e-04
Loss = 2.4744e-01, PNorm = 40.7651, GNorm = 6.3239, lr_0 = 8.6913e-04
Loss = 2.3893e-01, PNorm = 40.7889, GNorm = 1.9219, lr_0 = 8.7288e-04
Loss = 1.9113e-01, PNorm = 40.8091, GNorm = 1.2830, lr_0 = 8.7663e-04
Loss = 2.0422e-01, PNorm = 40.8237, GNorm = 4.7378, lr_0 = 8.8038e-04
Loss = 2.1633e-01, PNorm = 40.8427, GNorm = 4.5481, lr_0 = 8.8413e-04
Loss = 2.1334e-01, PNorm = 40.8603, GNorm = 3.9760, lr_0 = 8.8788e-04
Loss = 2.1271e-01, PNorm = 40.8792, GNorm = 3.1068, lr_0 = 8.9163e-04
Loss = 2.1672e-01, PNorm = 40.9035, GNorm = 5.2570, lr_0 = 8.9538e-04
Loss = 2.3876e-01, PNorm = 40.9222, GNorm = 1.2153, lr_0 = 8.9913e-04
Loss = 2.4517e-01, PNorm = 40.9491, GNorm = 3.6629, lr_0 = 9.0288e-04
Loss = 2.1661e-01, PNorm = 40.9640, GNorm = 4.9309, lr_0 = 9.0663e-04
Loss = 2.5539e-01, PNorm = 40.9883, GNorm = 8.5655, lr_0 = 9.1038e-04
Loss = 2.3542e-01, PNorm = 41.0102, GNorm = 1.7872, lr_0 = 9.1413e-04
Loss = 2.4938e-01, PNorm = 41.0370, GNorm = 1.4700, lr_0 = 9.1788e-04
Loss = 2.5030e-01, PNorm = 41.0660, GNorm = 6.4999, lr_0 = 9.2163e-04
Loss = 2.3675e-01, PNorm = 41.0849, GNorm = 4.0282, lr_0 = 9.2538e-04
Loss = 2.3763e-01, PNorm = 41.1201, GNorm = 1.1639, lr_0 = 9.2913e-04
Loss = 2.5546e-01, PNorm = 41.1445, GNorm = 4.6677, lr_0 = 9.3288e-04
Loss = 2.5872e-01, PNorm = 41.1675, GNorm = 3.8675, lr_0 = 9.3663e-04
Loss = 2.2631e-01, PNorm = 41.1894, GNorm = 4.9884, lr_0 = 9.4038e-04
Loss = 2.1347e-01, PNorm = 41.2204, GNorm = 1.1400, lr_0 = 9.4413e-04
Loss = 2.1886e-01, PNorm = 41.2443, GNorm = 1.3647, lr_0 = 9.4788e-04
Loss = 2.1132e-01, PNorm = 41.2647, GNorm = 3.0191, lr_0 = 9.5163e-04
Loss = 2.5119e-01, PNorm = 41.2833, GNorm = 2.9714, lr_0 = 9.5538e-04
Loss = 1.9426e-01, PNorm = 41.2966, GNorm = 2.6497, lr_0 = 9.5913e-04
Loss = 1.9786e-01, PNorm = 41.3212, GNorm = 4.6613, lr_0 = 9.6288e-04
Loss = 2.2782e-01, PNorm = 41.3439, GNorm = 2.9252, lr_0 = 9.6663e-04
Loss = 2.1502e-01, PNorm = 41.3698, GNorm = 6.0185, lr_0 = 9.7038e-04
Loss = 3.0760e-01, PNorm = 41.4102, GNorm = 2.8695, lr_0 = 9.7413e-04
Loss = 3.1359e-01, PNorm = 41.4589, GNorm = 5.0070, lr_0 = 9.7788e-04
Loss = 2.4147e-01, PNorm = 41.5073, GNorm = 1.0918, lr_0 = 9.8163e-04
Loss = 2.2663e-01, PNorm = 41.5422, GNorm = 2.2301, lr_0 = 9.8537e-04
Loss = 2.0041e-01, PNorm = 41.5670, GNorm = 1.4911, lr_0 = 9.8912e-04
Loss = 2.7896e-01, PNorm = 41.5864, GNorm = 7.0741, lr_0 = 9.9288e-04
Loss = 2.1890e-01, PNorm = 41.6156, GNorm = 0.8437, lr_0 = 9.9663e-04
Loss = 2.3142e-01, PNorm = 41.6485, GNorm = 1.4374, lr_0 = 9.9993e-04
Validation mae = 0.535704
Epoch 2
Loss = 2.2090e-01, PNorm = 41.6778, GNorm = 0.7200, lr_0 = 9.9925e-04
Loss = 1.9343e-01, PNorm = 41.7068, GNorm = 0.9184, lr_0 = 9.9856e-04
Loss = 2.1147e-01, PNorm = 41.7332, GNorm = 1.2308, lr_0 = 9.9788e-04
Loss = 2.0769e-01, PNorm = 41.7593, GNorm = 1.2811, lr_0 = 9.9719e-04
Loss = 1.8295e-01, PNorm = 41.7888, GNorm = 4.0780, lr_0 = 9.9651e-04
Loss = 2.0636e-01, PNorm = 41.8070, GNorm = 0.7038, lr_0 = 9.9583e-04
Loss = 1.8270e-01, PNorm = 41.8311, GNorm = 1.1067, lr_0 = 9.9515e-04
Loss = 2.1391e-01, PNorm = 41.8674, GNorm = 4.4420, lr_0 = 9.9446e-04
Loss = 2.2633e-01, PNorm = 41.8913, GNorm = 2.4968, lr_0 = 9.9378e-04
Loss = 2.0248e-01, PNorm = 41.9187, GNorm = 1.6523, lr_0 = 9.9310e-04
Loss = 1.9295e-01, PNorm = 41.9406, GNorm = 2.6187, lr_0 = 9.9242e-04
Loss = 2.4690e-01, PNorm = 41.9596, GNorm = 1.1448, lr_0 = 9.9174e-04
Loss = 2.3986e-01, PNorm = 41.9884, GNorm = 2.1282, lr_0 = 9.9106e-04
Loss = 2.0507e-01, PNorm = 42.0157, GNorm = 5.1400, lr_0 = 9.9038e-04
Loss = 2.2747e-01, PNorm = 42.0353, GNorm = 5.1124, lr_0 = 9.8971e-04
Loss = 2.1500e-01, PNorm = 42.0627, GNorm = 3.7686, lr_0 = 9.8903e-04
Loss = 2.0138e-01, PNorm = 42.0906, GNorm = 1.7040, lr_0 = 9.8835e-04
Loss = 1.8203e-01, PNorm = 42.1145, GNorm = 2.1665, lr_0 = 9.8767e-04
Loss = 1.8333e-01, PNorm = 42.1370, GNorm = 3.4543, lr_0 = 9.8700e-04
Loss = 2.0015e-01, PNorm = 42.1541, GNorm = 0.9788, lr_0 = 9.8632e-04
Loss = 2.0905e-01, PNorm = 42.1745, GNorm = 3.6662, lr_0 = 9.8564e-04
Loss = 2.4193e-01, PNorm = 42.2010, GNorm = 2.7827, lr_0 = 9.8497e-04
Loss = 2.1723e-01, PNorm = 42.2270, GNorm = 2.4857, lr_0 = 9.8429e-04
Loss = 1.9235e-01, PNorm = 42.2538, GNorm = 1.0283, lr_0 = 9.8362e-04
Loss = 1.9339e-01, PNorm = 42.2779, GNorm = 4.6876, lr_0 = 9.8295e-04
Loss = 2.2487e-01, PNorm = 42.3060, GNorm = 3.2899, lr_0 = 9.8227e-04
Loss = 2.5847e-01, PNorm = 42.3367, GNorm = 5.0309, lr_0 = 9.8160e-04
Loss = 2.1714e-01, PNorm = 42.3737, GNorm = 4.2918, lr_0 = 9.8093e-04
Loss = 2.2001e-01, PNorm = 42.4015, GNorm = 2.4246, lr_0 = 9.8026e-04
Loss = 2.1237e-01, PNorm = 42.4321, GNorm = 0.8857, lr_0 = 9.7958e-04
Loss = 2.1261e-01, PNorm = 42.4508, GNorm = 1.7343, lr_0 = 9.7891e-04
Loss = 1.9474e-01, PNorm = 42.4697, GNorm = 2.5692, lr_0 = 9.7824e-04
Loss = 2.2162e-01, PNorm = 42.4938, GNorm = 1.0359, lr_0 = 9.7757e-04
Loss = 1.7710e-01, PNorm = 42.5261, GNorm = 4.0587, lr_0 = 9.7690e-04
Loss = 2.0047e-01, PNorm = 42.5594, GNorm = 1.5328, lr_0 = 9.7623e-04
Loss = 1.7457e-01, PNorm = 42.5851, GNorm = 3.3058, lr_0 = 9.7556e-04
Loss = 2.3727e-01, PNorm = 42.6127, GNorm = 8.1655, lr_0 = 9.7490e-04
Loss = 2.2995e-01, PNorm = 42.6494, GNorm = 1.8860, lr_0 = 9.7423e-04
Loss = 2.0748e-01, PNorm = 42.6826, GNorm = 5.0069, lr_0 = 9.7356e-04
Loss = 2.1816e-01, PNorm = 42.7097, GNorm = 1.9341, lr_0 = 9.7289e-04
Loss = 2.0241e-01, PNorm = 42.7425, GNorm = 1.1108, lr_0 = 9.7223e-04
Loss = 2.5781e-01, PNorm = 42.7685, GNorm = 6.0026, lr_0 = 9.7156e-04
Loss = 2.6621e-01, PNorm = 42.7928, GNorm = 5.9848, lr_0 = 9.7090e-04
Loss = 2.1731e-01, PNorm = 42.8223, GNorm = 2.0247, lr_0 = 9.7023e-04
Loss = 2.0929e-01, PNorm = 42.8532, GNorm = 0.9619, lr_0 = 9.6957e-04
Loss = 1.9010e-01, PNorm = 42.8783, GNorm = 2.7003, lr_0 = 9.6890e-04
Loss = 1.8263e-01, PNorm = 42.8928, GNorm = 0.9258, lr_0 = 9.6824e-04
Loss = 1.5767e-01, PNorm = 42.9108, GNorm = 1.0292, lr_0 = 9.6757e-04
Loss = 1.8285e-01, PNorm = 42.9301, GNorm = 2.2424, lr_0 = 9.6691e-04
Loss = 1.8205e-01, PNorm = 42.9453, GNorm = 0.8916, lr_0 = 9.6625e-04
Loss = 2.0356e-01, PNorm = 42.9683, GNorm = 2.5934, lr_0 = 9.6559e-04
Loss = 1.9847e-01, PNorm = 42.9822, GNorm = 1.2732, lr_0 = 9.6493e-04
Loss = 2.2794e-01, PNorm = 43.0058, GNorm = 3.4597, lr_0 = 9.6427e-04
Loss = 2.0596e-01, PNorm = 43.0387, GNorm = 3.3124, lr_0 = 9.6360e-04
Loss = 1.9010e-01, PNorm = 43.0655, GNorm = 1.2907, lr_0 = 9.6294e-04
Loss = 1.7520e-01, PNorm = 43.0840, GNorm = 2.1906, lr_0 = 9.6228e-04
Loss = 2.1380e-01, PNorm = 43.1019, GNorm = 3.8956, lr_0 = 9.6163e-04
Loss = 2.0865e-01, PNorm = 43.1326, GNorm = 1.3786, lr_0 = 9.6097e-04
Loss = 2.1668e-01, PNorm = 43.1601, GNorm = 2.3368, lr_0 = 9.6031e-04
Loss = 2.3233e-01, PNorm = 43.1823, GNorm = 0.8426, lr_0 = 9.5965e-04
Loss = 1.7564e-01, PNorm = 43.2033, GNorm = 1.8033, lr_0 = 9.5899e-04
Loss = 1.8922e-01, PNorm = 43.2283, GNorm = 1.1980, lr_0 = 9.5834e-04
Loss = 2.1736e-01, PNorm = 43.2386, GNorm = 1.4631, lr_0 = 9.5768e-04
Loss = 2.0235e-01, PNorm = 43.2549, GNorm = 1.3381, lr_0 = 9.5702e-04
Loss = 2.1454e-01, PNorm = 43.2757, GNorm = 5.3927, lr_0 = 9.5637e-04
Loss = 2.3979e-01, PNorm = 43.3064, GNorm = 5.3455, lr_0 = 9.5571e-04
Loss = 2.2591e-01, PNorm = 43.3400, GNorm = 1.5311, lr_0 = 9.5506e-04
Loss = 2.1091e-01, PNorm = 43.3731, GNorm = 5.0561, lr_0 = 9.5440e-04
Loss = 1.9499e-01, PNorm = 43.3988, GNorm = 5.4852, lr_0 = 9.5375e-04
Loss = 1.8957e-01, PNorm = 43.4250, GNorm = 0.8690, lr_0 = 9.5310e-04
Loss = 1.8116e-01, PNorm = 43.4512, GNorm = 1.0946, lr_0 = 9.5244e-04
Loss = 1.6422e-01, PNorm = 43.4773, GNorm = 1.4262, lr_0 = 9.5179e-04
Loss = 1.7014e-01, PNorm = 43.4901, GNorm = 1.7661, lr_0 = 9.5114e-04
Loss = 1.8510e-01, PNorm = 43.5104, GNorm = 0.8775, lr_0 = 9.5049e-04
Loss = 1.9605e-01, PNorm = 43.5315, GNorm = 2.0693, lr_0 = 9.4984e-04
Loss = 1.8300e-01, PNorm = 43.5453, GNorm = 1.4024, lr_0 = 9.4919e-04
Loss = 2.2497e-01, PNorm = 43.5699, GNorm = 1.3095, lr_0 = 9.4854e-04
Loss = 1.9151e-01, PNorm = 43.5929, GNorm = 1.0923, lr_0 = 9.4789e-04
Loss = 1.9536e-01, PNorm = 43.6101, GNorm = 2.3757, lr_0 = 9.4724e-04
Loss = 1.9522e-01, PNorm = 43.6348, GNorm = 0.5564, lr_0 = 9.4659e-04
Loss = 1.7988e-01, PNorm = 43.6632, GNorm = 1.2519, lr_0 = 9.4594e-04
Loss = 1.8626e-01, PNorm = 43.6922, GNorm = 0.7804, lr_0 = 9.4529e-04
Loss = 1.8803e-01, PNorm = 43.7150, GNorm = 2.2536, lr_0 = 9.4464e-04
Loss = 1.6514e-01, PNorm = 43.7383, GNorm = 0.9614, lr_0 = 9.4400e-04
Loss = 1.7771e-01, PNorm = 43.7529, GNorm = 1.5414, lr_0 = 9.4335e-04
Loss = 2.0965e-01, PNorm = 43.7715, GNorm = 2.7440, lr_0 = 9.4270e-04
Loss = 1.9997e-01, PNorm = 43.8037, GNorm = 2.7582, lr_0 = 9.4206e-04
Loss = 2.2348e-01, PNorm = 43.8272, GNorm = 2.6189, lr_0 = 9.4141e-04
Loss = 1.7676e-01, PNorm = 43.8557, GNorm = 1.9006, lr_0 = 9.4077e-04
Loss = 2.1108e-01, PNorm = 43.8841, GNorm = 4.3078, lr_0 = 9.4012e-04
Loss = 2.1278e-01, PNorm = 43.9185, GNorm = 4.3052, lr_0 = 9.3948e-04
Loss = 2.1325e-01, PNorm = 43.9475, GNorm = 4.7717, lr_0 = 9.3884e-04
Loss = 2.3101e-01, PNorm = 43.9713, GNorm = 2.4508, lr_0 = 9.3819e-04
Loss = 1.9659e-01, PNorm = 44.0040, GNorm = 1.4865, lr_0 = 9.3755e-04
Loss = 2.1928e-01, PNorm = 44.0311, GNorm = 1.6899, lr_0 = 9.3691e-04
Loss = 1.8173e-01, PNorm = 44.0537, GNorm = 1.3691, lr_0 = 9.3627e-04
Loss = 1.9418e-01, PNorm = 44.0702, GNorm = 1.3304, lr_0 = 9.3562e-04
Loss = 1.8070e-01, PNorm = 44.0855, GNorm = 1.5383, lr_0 = 9.3498e-04
Loss = 1.7876e-01, PNorm = 44.1036, GNorm = 4.2668, lr_0 = 9.3434e-04
Loss = 1.6976e-01, PNorm = 44.1196, GNorm = 3.5916, lr_0 = 9.3370e-04
Loss = 2.0539e-01, PNorm = 44.1375, GNorm = 2.3050, lr_0 = 9.3306e-04
Loss = 1.8926e-01, PNorm = 44.1525, GNorm = 3.2778, lr_0 = 9.3242e-04
Loss = 2.2032e-01, PNorm = 44.1720, GNorm = 4.6294, lr_0 = 9.3178e-04
Loss = 2.0314e-01, PNorm = 44.1933, GNorm = 1.7161, lr_0 = 9.3115e-04
Loss = 1.9081e-01, PNorm = 44.2171, GNorm = 1.6595, lr_0 = 9.3051e-04
Loss = 1.9676e-01, PNorm = 44.2368, GNorm = 1.6396, lr_0 = 9.2987e-04
Loss = 1.9591e-01, PNorm = 44.2560, GNorm = 1.4829, lr_0 = 9.2923e-04
Loss = 1.8675e-01, PNorm = 44.2789, GNorm = 3.1243, lr_0 = 9.2860e-04
Loss = 1.6135e-01, PNorm = 44.2991, GNorm = 1.5076, lr_0 = 9.2796e-04
Loss = 1.6036e-01, PNorm = 44.3198, GNorm = 0.9078, lr_0 = 9.2733e-04
Loss = 2.0084e-01, PNorm = 44.3403, GNorm = 0.8350, lr_0 = 9.2669e-04
Loss = 1.7391e-01, PNorm = 44.3613, GNorm = 3.1207, lr_0 = 9.2606e-04
Loss = 1.8425e-01, PNorm = 44.3832, GNorm = 2.5515, lr_0 = 9.2542e-04
Loss = 1.7275e-01, PNorm = 44.4032, GNorm = 3.2710, lr_0 = 9.2479e-04
Loss = 1.9585e-01, PNorm = 44.4219, GNorm = 0.8993, lr_0 = 9.2415e-04
Loss = 2.1024e-01, PNorm = 44.4550, GNorm = 1.8332, lr_0 = 9.2352e-04
Loss = 2.0163e-01, PNorm = 44.4833, GNorm = 3.1996, lr_0 = 9.2289e-04
Loss = 1.7370e-01, PNorm = 44.5094, GNorm = 1.0962, lr_0 = 9.2226e-04
Loss = 1.9197e-01, PNorm = 44.5322, GNorm = 3.1511, lr_0 = 9.2162e-04
Loss = 1.7415e-01, PNorm = 44.5562, GNorm = 2.3999, lr_0 = 9.2099e-04
Validation mae = 0.472069
Epoch 3
Loss = 2.0042e-01, PNorm = 44.5800, GNorm = 1.4525, lr_0 = 9.2036e-04
Loss = 1.8249e-01, PNorm = 44.6105, GNorm = 1.1737, lr_0 = 9.1973e-04
Loss = 1.7318e-01, PNorm = 44.6429, GNorm = 3.3385, lr_0 = 9.1910e-04
Loss = 1.6714e-01, PNorm = 44.6686, GNorm = 1.5330, lr_0 = 9.1847e-04
Loss = 1.7393e-01, PNorm = 44.6896, GNorm = 1.3109, lr_0 = 9.1784e-04
Loss = 1.7343e-01, PNorm = 44.7191, GNorm = 2.4142, lr_0 = 9.1721e-04
Loss = 1.7881e-01, PNorm = 44.7490, GNorm = 2.1237, lr_0 = 9.1658e-04
Loss = 2.0697e-01, PNorm = 44.7830, GNorm = 0.8063, lr_0 = 9.1596e-04
Loss = 1.8513e-01, PNorm = 44.8060, GNorm = 2.6933, lr_0 = 9.1533e-04
Loss = 1.6741e-01, PNorm = 44.8334, GNorm = 1.3502, lr_0 = 9.1470e-04
Loss = 1.5030e-01, PNorm = 44.8542, GNorm = 1.4270, lr_0 = 9.1408e-04
Loss = 1.9036e-01, PNorm = 44.8772, GNorm = 1.6551, lr_0 = 9.1345e-04
Loss = 1.4111e-01, PNorm = 44.8951, GNorm = 1.6570, lr_0 = 9.1282e-04
Loss = 1.7951e-01, PNorm = 44.9233, GNorm = 3.0220, lr_0 = 9.1220e-04
Loss = 2.0054e-01, PNorm = 44.9497, GNorm = 1.6960, lr_0 = 9.1157e-04
Loss = 1.8601e-01, PNorm = 44.9831, GNorm = 1.9953, lr_0 = 9.1095e-04
Loss = 1.7761e-01, PNorm = 45.0054, GNorm = 1.3368, lr_0 = 9.1032e-04
Loss = 1.7992e-01, PNorm = 45.0296, GNorm = 2.1919, lr_0 = 9.0970e-04
Loss = 1.7519e-01, PNorm = 45.0516, GNorm = 1.3542, lr_0 = 9.0908e-04
Loss = 1.8595e-01, PNorm = 45.0782, GNorm = 2.2850, lr_0 = 9.0846e-04
Loss = 2.0718e-01, PNorm = 45.1021, GNorm = 2.2085, lr_0 = 9.0783e-04
Loss = 1.9563e-01, PNorm = 45.1329, GNorm = 1.1142, lr_0 = 9.0721e-04
Loss = 1.7277e-01, PNorm = 45.1557, GNorm = 3.0799, lr_0 = 9.0659e-04
Loss = 1.7827e-01, PNorm = 45.1824, GNorm = 1.3428, lr_0 = 9.0597e-04
Loss = 1.8412e-01, PNorm = 45.2092, GNorm = 2.5537, lr_0 = 9.0535e-04
Loss = 1.9301e-01, PNorm = 45.2323, GNorm = 2.3847, lr_0 = 9.0473e-04
Loss = 1.9027e-01, PNorm = 45.2579, GNorm = 2.3317, lr_0 = 9.0411e-04
Loss = 1.6141e-01, PNorm = 45.2770, GNorm = 1.2153, lr_0 = 9.0349e-04
Loss = 1.5581e-01, PNorm = 45.3008, GNorm = 1.6845, lr_0 = 9.0287e-04
Loss = 1.6833e-01, PNorm = 45.3297, GNorm = 1.1541, lr_0 = 9.0225e-04
Loss = 1.7383e-01, PNorm = 45.3477, GNorm = 1.2021, lr_0 = 9.0163e-04
Loss = 2.3333e-01, PNorm = 45.3708, GNorm = 3.3559, lr_0 = 9.0102e-04
Loss = 1.7926e-01, PNorm = 45.4022, GNorm = 1.1975, lr_0 = 9.0040e-04
Loss = 2.1709e-01, PNorm = 45.4337, GNorm = 3.9859, lr_0 = 8.9978e-04
Loss = 1.5253e-01, PNorm = 45.4684, GNorm = 1.9051, lr_0 = 8.9916e-04
Loss = 1.8183e-01, PNorm = 45.4929, GNorm = 1.0952, lr_0 = 8.9855e-04
Loss = 1.7712e-01, PNorm = 45.5167, GNorm = 1.6320, lr_0 = 8.9793e-04
Loss = 1.7027e-01, PNorm = 45.5397, GNorm = 1.8961, lr_0 = 8.9732e-04
Loss = 1.8719e-01, PNorm = 45.5580, GNorm = 2.0126, lr_0 = 8.9670e-04
Loss = 1.8103e-01, PNorm = 45.5760, GNorm = 0.8996, lr_0 = 8.9609e-04
Loss = 1.6506e-01, PNorm = 45.5976, GNorm = 2.1856, lr_0 = 8.9548e-04
Loss = 1.7712e-01, PNorm = 45.6242, GNorm = 3.2203, lr_0 = 8.9486e-04
Loss = 1.6751e-01, PNorm = 45.6434, GNorm = 0.8255, lr_0 = 8.9425e-04
Loss = 1.7232e-01, PNorm = 45.6656, GNorm = 1.0529, lr_0 = 8.9364e-04
Loss = 1.7913e-01, PNorm = 45.6821, GNorm = 1.1499, lr_0 = 8.9302e-04
Loss = 1.5344e-01, PNorm = 45.7061, GNorm = 0.9992, lr_0 = 8.9241e-04
Loss = 1.8089e-01, PNorm = 45.7272, GNorm = 4.6661, lr_0 = 8.9180e-04
Loss = 1.8503e-01, PNorm = 45.7497, GNorm = 1.3478, lr_0 = 8.9119e-04
Loss = 1.7711e-01, PNorm = 45.7691, GNorm = 1.2284, lr_0 = 8.9058e-04
Loss = 1.8095e-01, PNorm = 45.7906, GNorm = 2.0086, lr_0 = 8.8997e-04
Loss = 1.6028e-01, PNorm = 45.8151, GNorm = 2.1610, lr_0 = 8.8936e-04
Loss = 1.6888e-01, PNorm = 45.8375, GNorm = 1.6293, lr_0 = 8.8875e-04
Loss = 1.7905e-01, PNorm = 45.8670, GNorm = 2.7736, lr_0 = 8.8814e-04
Loss = 2.0349e-01, PNorm = 45.8918, GNorm = 1.7064, lr_0 = 8.8753e-04
Loss = 1.9169e-01, PNorm = 45.9176, GNorm = 0.7735, lr_0 = 8.8693e-04
Loss = 1.9308e-01, PNorm = 45.9477, GNorm = 3.1346, lr_0 = 8.8632e-04
Loss = 1.9348e-01, PNorm = 45.9771, GNorm = 4.1519, lr_0 = 8.8571e-04
Loss = 2.3640e-01, PNorm = 46.0051, GNorm = 1.8211, lr_0 = 8.8510e-04
Loss = 2.2610e-01, PNorm = 46.0480, GNorm = 2.6935, lr_0 = 8.8450e-04
Loss = 1.6304e-01, PNorm = 46.0853, GNorm = 1.1818, lr_0 = 8.8389e-04
Loss = 1.7285e-01, PNorm = 46.1022, GNorm = 1.3666, lr_0 = 8.8329e-04
Loss = 1.7341e-01, PNorm = 46.1186, GNorm = 0.9891, lr_0 = 8.8268e-04
Loss = 1.5315e-01, PNorm = 46.1340, GNorm = 2.4806, lr_0 = 8.8208e-04
Loss = 1.6810e-01, PNorm = 46.1451, GNorm = 1.8170, lr_0 = 8.8147e-04
Loss = 1.6324e-01, PNorm = 46.1586, GNorm = 0.7720, lr_0 = 8.8087e-04
Loss = 1.8363e-01, PNorm = 46.1838, GNorm = 1.1272, lr_0 = 8.8026e-04
Loss = 1.8727e-01, PNorm = 46.2134, GNorm = 1.3636, lr_0 = 8.7966e-04
Loss = 1.5947e-01, PNorm = 46.2429, GNorm = 1.6607, lr_0 = 8.7906e-04
Loss = 1.6453e-01, PNorm = 46.2671, GNorm = 0.7222, lr_0 = 8.7846e-04
Loss = 1.4976e-01, PNorm = 46.2812, GNorm = 1.1151, lr_0 = 8.7785e-04
Loss = 2.0901e-01, PNorm = 46.2986, GNorm = 2.3473, lr_0 = 8.7725e-04
Loss = 1.9778e-01, PNorm = 46.3267, GNorm = 0.5903, lr_0 = 8.7665e-04
Loss = 1.6661e-01, PNorm = 46.3582, GNorm = 0.9951, lr_0 = 8.7605e-04
Loss = 1.6051e-01, PNorm = 46.3863, GNorm = 0.6635, lr_0 = 8.7545e-04
Loss = 1.7478e-01, PNorm = 46.4082, GNorm = 0.9889, lr_0 = 8.7485e-04
Loss = 1.6806e-01, PNorm = 46.4273, GNorm = 1.2857, lr_0 = 8.7425e-04
Loss = 1.6180e-01, PNorm = 46.4503, GNorm = 1.2113, lr_0 = 8.7365e-04
Loss = 1.7384e-01, PNorm = 46.4646, GNorm = 1.3451, lr_0 = 8.7306e-04
Loss = 1.5843e-01, PNorm = 46.4728, GNorm = 0.8081, lr_0 = 8.7246e-04
Loss = 1.6107e-01, PNorm = 46.4933, GNorm = 1.9010, lr_0 = 8.7186e-04
Loss = 1.7730e-01, PNorm = 46.5163, GNorm = 1.2742, lr_0 = 8.7126e-04
Loss = 1.6458e-01, PNorm = 46.5403, GNorm = 1.1591, lr_0 = 8.7067e-04
Loss = 1.5526e-01, PNorm = 46.5698, GNorm = 1.3008, lr_0 = 8.7007e-04
Loss = 1.5413e-01, PNorm = 46.5835, GNorm = 0.8080, lr_0 = 8.6947e-04
Loss = 1.7314e-01, PNorm = 46.5937, GNorm = 1.2372, lr_0 = 8.6888e-04
Loss = 1.5838e-01, PNorm = 46.6192, GNorm = 1.1548, lr_0 = 8.6828e-04
Loss = 1.5121e-01, PNorm = 46.6508, GNorm = 1.5682, lr_0 = 8.6769e-04
Loss = 1.6381e-01, PNorm = 46.6717, GNorm = 2.8965, lr_0 = 8.6709e-04
Loss = 1.7624e-01, PNorm = 46.6850, GNorm = 1.4188, lr_0 = 8.6650e-04
Loss = 1.5586e-01, PNorm = 46.7099, GNorm = 1.5935, lr_0 = 8.6590e-04
Loss = 1.6675e-01, PNorm = 46.7351, GNorm = 1.8452, lr_0 = 8.6531e-04
Loss = 1.5912e-01, PNorm = 46.7555, GNorm = 0.9069, lr_0 = 8.6472e-04
Loss = 1.7108e-01, PNorm = 46.7717, GNorm = 1.1305, lr_0 = 8.6413e-04
Loss = 1.8472e-01, PNorm = 46.7877, GNorm = 2.9730, lr_0 = 8.6353e-04
Loss = 1.5754e-01, PNorm = 46.8065, GNorm = 2.6037, lr_0 = 8.6294e-04
Loss = 1.4284e-01, PNorm = 46.8155, GNorm = 1.7873, lr_0 = 8.6235e-04
Loss = 1.5413e-01, PNorm = 46.8292, GNorm = 2.0712, lr_0 = 8.6176e-04
Loss = 1.8244e-01, PNorm = 46.8462, GNorm = 2.1448, lr_0 = 8.6117e-04
Loss = 1.9092e-01, PNorm = 46.8720, GNorm = 1.3310, lr_0 = 8.6058e-04
Loss = 1.7270e-01, PNorm = 46.9053, GNorm = 3.1729, lr_0 = 8.5999e-04
Loss = 2.0558e-01, PNorm = 46.9337, GNorm = 1.0755, lr_0 = 8.5940e-04
Loss = 1.9539e-01, PNorm = 46.9560, GNorm = 1.8033, lr_0 = 8.5881e-04
Loss = 1.9130e-01, PNorm = 46.9733, GNorm = 2.7426, lr_0 = 8.5823e-04
Loss = 1.7991e-01, PNorm = 46.9899, GNorm = 2.6794, lr_0 = 8.5764e-04
Loss = 1.7602e-01, PNorm = 47.0043, GNorm = 1.3421, lr_0 = 8.5705e-04
Loss = 2.1890e-01, PNorm = 47.0267, GNorm = 3.7370, lr_0 = 8.5646e-04
Loss = 1.7464e-01, PNorm = 47.0539, GNorm = 1.5627, lr_0 = 8.5588e-04
Loss = 1.4877e-01, PNorm = 47.0864, GNorm = 1.0631, lr_0 = 8.5529e-04
Loss = 1.8073e-01, PNorm = 47.1073, GNorm = 1.8528, lr_0 = 8.5470e-04
Loss = 1.6901e-01, PNorm = 47.1231, GNorm = 3.0242, lr_0 = 8.5412e-04
Loss = 1.4792e-01, PNorm = 47.1449, GNorm = 1.6409, lr_0 = 8.5353e-04
Loss = 1.6001e-01, PNorm = 47.1623, GNorm = 0.6290, lr_0 = 8.5295e-04
Loss = 1.5579e-01, PNorm = 47.1821, GNorm = 1.4575, lr_0 = 8.5236e-04
Loss = 1.5128e-01, PNorm = 47.1973, GNorm = 0.5863, lr_0 = 8.5178e-04
Loss = 1.5614e-01, PNorm = 47.2154, GNorm = 1.2256, lr_0 = 8.5120e-04
Loss = 1.3570e-01, PNorm = 47.2384, GNorm = 1.3785, lr_0 = 8.5061e-04
Loss = 1.5904e-01, PNorm = 47.2473, GNorm = 0.9598, lr_0 = 8.5003e-04
Loss = 1.4130e-01, PNorm = 47.2603, GNorm = 0.7948, lr_0 = 8.4945e-04
Loss = 1.4182e-01, PNorm = 47.2804, GNorm = 1.0206, lr_0 = 8.4887e-04
Loss = 1.6183e-01, PNorm = 47.2936, GNorm = 2.0327, lr_0 = 8.4828e-04
Validation mae = 0.459057
Epoch 4
Loss = 1.4456e-01, PNorm = 47.3112, GNorm = 1.3860, lr_0 = 8.4770e-04
Loss = 1.6059e-01, PNorm = 47.3257, GNorm = 1.0637, lr_0 = 8.4712e-04
Loss = 1.5110e-01, PNorm = 47.3477, GNorm = 1.3261, lr_0 = 8.4654e-04
Loss = 1.4747e-01, PNorm = 47.3766, GNorm = 1.7607, lr_0 = 8.4596e-04
Loss = 1.6559e-01, PNorm = 47.3937, GNorm = 4.4395, lr_0 = 8.4538e-04
Loss = 1.8753e-01, PNorm = 47.4129, GNorm = 0.5532, lr_0 = 8.4480e-04
Loss = 1.5018e-01, PNorm = 47.4460, GNorm = 0.8336, lr_0 = 8.4423e-04
Loss = 1.9525e-01, PNorm = 47.4753, GNorm = 1.9324, lr_0 = 8.4365e-04
Loss = 1.6237e-01, PNorm = 47.5068, GNorm = 0.7513, lr_0 = 8.4307e-04
Loss = 1.5625e-01, PNorm = 47.5456, GNorm = 1.1331, lr_0 = 8.4249e-04
Loss = 1.4925e-01, PNorm = 47.5711, GNorm = 2.1417, lr_0 = 8.4191e-04
Loss = 1.4893e-01, PNorm = 47.5880, GNorm = 0.7186, lr_0 = 8.4134e-04
Loss = 1.7155e-01, PNorm = 47.6047, GNorm = 1.4788, lr_0 = 8.4076e-04
Loss = 1.9168e-01, PNorm = 47.6231, GNorm = 2.2344, lr_0 = 8.4019e-04
Loss = 1.6320e-01, PNorm = 47.6460, GNorm = 2.3767, lr_0 = 8.3961e-04
Loss = 1.5199e-01, PNorm = 47.6706, GNorm = 1.3690, lr_0 = 8.3903e-04
Loss = 1.8254e-01, PNorm = 47.6855, GNorm = 3.1596, lr_0 = 8.3846e-04
Loss = 1.5626e-01, PNorm = 47.7161, GNorm = 0.7279, lr_0 = 8.3789e-04
Loss = 1.3716e-01, PNorm = 47.7444, GNorm = 1.2373, lr_0 = 8.3731e-04
Loss = 1.5042e-01, PNorm = 47.7630, GNorm = 2.1276, lr_0 = 8.3674e-04
Loss = 1.3735e-01, PNorm = 47.7847, GNorm = 0.6885, lr_0 = 8.3616e-04
Loss = 1.4689e-01, PNorm = 47.8026, GNorm = 0.8776, lr_0 = 8.3559e-04
Loss = 1.4894e-01, PNorm = 47.8163, GNorm = 0.9940, lr_0 = 8.3502e-04
Loss = 1.4568e-01, PNorm = 47.8402, GNorm = 0.8396, lr_0 = 8.3445e-04
Loss = 1.3595e-01, PNorm = 47.8585, GNorm = 2.2809, lr_0 = 8.3388e-04
Loss = 1.4913e-01, PNorm = 47.8772, GNorm = 2.3665, lr_0 = 8.3330e-04
Loss = 1.6295e-01, PNorm = 47.8981, GNorm = 0.9961, lr_0 = 8.3273e-04
Loss = 1.3735e-01, PNorm = 47.9106, GNorm = 1.0253, lr_0 = 8.3216e-04
Loss = 1.6255e-01, PNorm = 47.9251, GNorm = 1.0521, lr_0 = 8.3159e-04
Loss = 1.7396e-01, PNorm = 47.9463, GNorm = 1.6835, lr_0 = 8.3102e-04
Loss = 1.5538e-01, PNorm = 47.9714, GNorm = 0.6739, lr_0 = 8.3045e-04
Loss = 1.6516e-01, PNorm = 47.9882, GNorm = 1.1731, lr_0 = 8.2988e-04
Loss = 1.2084e-01, PNorm = 48.0019, GNorm = 1.2814, lr_0 = 8.2932e-04
Loss = 1.4370e-01, PNorm = 48.0190, GNorm = 1.1257, lr_0 = 8.2875e-04
Loss = 1.6635e-01, PNorm = 48.0410, GNorm = 2.1175, lr_0 = 8.2818e-04
Loss = 1.4531e-01, PNorm = 48.0654, GNorm = 1.6766, lr_0 = 8.2761e-04
Loss = 1.5815e-01, PNorm = 48.0909, GNorm = 1.4312, lr_0 = 8.2705e-04
Loss = 1.4665e-01, PNorm = 48.1154, GNorm = 1.1187, lr_0 = 8.2648e-04
Loss = 1.5129e-01, PNorm = 48.1317, GNorm = 0.7491, lr_0 = 8.2591e-04
Loss = 1.4455e-01, PNorm = 48.1523, GNorm = 0.7712, lr_0 = 8.2535e-04
Loss = 1.4638e-01, PNorm = 48.1763, GNorm = 2.1335, lr_0 = 8.2478e-04
Loss = 1.6129e-01, PNorm = 48.1929, GNorm = 1.7693, lr_0 = 8.2422e-04
Loss = 1.5082e-01, PNorm = 48.2087, GNorm = 1.1879, lr_0 = 8.2365e-04
Loss = 1.5459e-01, PNorm = 48.2252, GNorm = 2.2134, lr_0 = 8.2309e-04
Loss = 1.7055e-01, PNorm = 48.2408, GNorm = 3.3035, lr_0 = 8.2252e-04
Loss = 1.3499e-01, PNorm = 48.2641, GNorm = 1.4798, lr_0 = 8.2196e-04
Loss = 1.5601e-01, PNorm = 48.2844, GNorm = 2.5270, lr_0 = 8.2140e-04
Loss = 1.7112e-01, PNorm = 48.2992, GNorm = 1.5980, lr_0 = 8.2084e-04
Loss = 1.5441e-01, PNorm = 48.3262, GNorm = 0.8116, lr_0 = 8.2027e-04
Loss = 1.6424e-01, PNorm = 48.3527, GNorm = 1.5556, lr_0 = 8.1971e-04
Loss = 1.4196e-01, PNorm = 48.3723, GNorm = 0.7813, lr_0 = 8.1915e-04
Loss = 1.3328e-01, PNorm = 48.3867, GNorm = 1.1176, lr_0 = 8.1859e-04
Loss = 1.3906e-01, PNorm = 48.3960, GNorm = 2.5743, lr_0 = 8.1803e-04
Loss = 1.4888e-01, PNorm = 48.4102, GNorm = 1.0102, lr_0 = 8.1747e-04
Loss = 1.5319e-01, PNorm = 48.4317, GNorm = 1.7605, lr_0 = 8.1691e-04
Loss = 1.5821e-01, PNorm = 48.4564, GNorm = 0.8124, lr_0 = 8.1635e-04
Loss = 1.7400e-01, PNorm = 48.4735, GNorm = 1.0658, lr_0 = 8.1579e-04
Loss = 1.4776e-01, PNorm = 48.4979, GNorm = 0.7745, lr_0 = 8.1523e-04
Loss = 1.3748e-01, PNorm = 48.5199, GNorm = 1.3596, lr_0 = 8.1467e-04
Loss = 1.6324e-01, PNorm = 48.5327, GNorm = 0.7720, lr_0 = 8.1411e-04
Loss = 1.5861e-01, PNorm = 48.5488, GNorm = 2.7330, lr_0 = 8.1355e-04
Loss = 1.5270e-01, PNorm = 48.5786, GNorm = 2.0902, lr_0 = 8.1300e-04
Loss = 1.3592e-01, PNorm = 48.5930, GNorm = 1.3284, lr_0 = 8.1244e-04
Loss = 1.4130e-01, PNorm = 48.6090, GNorm = 0.7374, lr_0 = 8.1188e-04
Loss = 1.5187e-01, PNorm = 48.6295, GNorm = 1.3286, lr_0 = 8.1133e-04
Loss = 1.4904e-01, PNorm = 48.6460, GNorm = 0.6619, lr_0 = 8.1077e-04
Loss = 1.4288e-01, PNorm = 48.6665, GNorm = 1.7295, lr_0 = 8.1022e-04
Loss = 1.3834e-01, PNorm = 48.6889, GNorm = 0.8204, lr_0 = 8.0966e-04
Loss = 1.4998e-01, PNorm = 48.7019, GNorm = 1.4834, lr_0 = 8.0911e-04
Loss = 1.4730e-01, PNorm = 48.7196, GNorm = 3.1367, lr_0 = 8.0855e-04
Loss = 1.5066e-01, PNorm = 48.7392, GNorm = 0.8361, lr_0 = 8.0800e-04
Loss = 1.5856e-01, PNorm = 48.7624, GNorm = 1.8907, lr_0 = 8.0745e-04
Loss = 1.8113e-01, PNorm = 48.7871, GNorm = 1.1288, lr_0 = 8.0689e-04
Loss = 1.4476e-01, PNorm = 48.8162, GNorm = 2.1162, lr_0 = 8.0634e-04
Loss = 1.5716e-01, PNorm = 48.8335, GNorm = 0.9977, lr_0 = 8.0579e-04
Loss = 1.5563e-01, PNorm = 48.8468, GNorm = 1.0220, lr_0 = 8.0523e-04
Loss = 1.6103e-01, PNorm = 48.8644, GNorm = 0.7534, lr_0 = 8.0468e-04
Loss = 1.6828e-01, PNorm = 48.8792, GNorm = 0.8165, lr_0 = 8.0413e-04
Loss = 1.5857e-01, PNorm = 48.8949, GNorm = 2.2581, lr_0 = 8.0358e-04
Loss = 1.3054e-01, PNorm = 48.9171, GNorm = 0.8022, lr_0 = 8.0303e-04
Loss = 1.7117e-01, PNorm = 48.9320, GNorm = 1.6861, lr_0 = 8.0248e-04
Loss = 1.3312e-01, PNorm = 48.9466, GNorm = 1.0071, lr_0 = 8.0193e-04
Loss = 1.4415e-01, PNorm = 48.9626, GNorm = 3.3489, lr_0 = 8.0138e-04
Loss = 1.4350e-01, PNorm = 48.9799, GNorm = 0.8673, lr_0 = 8.0083e-04
Loss = 1.6728e-01, PNorm = 49.0013, GNorm = 1.2706, lr_0 = 8.0028e-04
Loss = 1.4461e-01, PNorm = 49.0255, GNorm = 0.8038, lr_0 = 7.9974e-04
Loss = 1.4782e-01, PNorm = 49.0505, GNorm = 0.9857, lr_0 = 7.9919e-04
Loss = 1.6386e-01, PNorm = 49.0741, GNorm = 0.7673, lr_0 = 7.9864e-04
Loss = 1.4320e-01, PNorm = 49.0968, GNorm = 2.1758, lr_0 = 7.9809e-04
Loss = 1.5130e-01, PNorm = 49.1172, GNorm = 1.4249, lr_0 = 7.9755e-04
Loss = 1.4303e-01, PNorm = 49.1353, GNorm = 0.8099, lr_0 = 7.9700e-04
Loss = 1.4232e-01, PNorm = 49.1471, GNorm = 0.9722, lr_0 = 7.9645e-04
Loss = 1.7158e-01, PNorm = 49.1627, GNorm = 0.6389, lr_0 = 7.9591e-04
Loss = 1.3646e-01, PNorm = 49.1807, GNorm = 0.9419, lr_0 = 7.9536e-04
Loss = 1.5496e-01, PNorm = 49.1939, GNorm = 1.1795, lr_0 = 7.9482e-04
Loss = 1.4748e-01, PNorm = 49.2076, GNorm = 0.8233, lr_0 = 7.9427e-04
Loss = 1.5194e-01, PNorm = 49.2244, GNorm = 0.5799, lr_0 = 7.9373e-04
Loss = 1.3941e-01, PNorm = 49.2380, GNorm = 1.2451, lr_0 = 7.9319e-04
Loss = 1.4404e-01, PNorm = 49.2609, GNorm = 0.8374, lr_0 = 7.9264e-04
Loss = 1.6143e-01, PNorm = 49.2823, GNorm = 2.7249, lr_0 = 7.9210e-04
Loss = 1.5965e-01, PNorm = 49.3058, GNorm = 0.6085, lr_0 = 7.9156e-04
Loss = 1.5485e-01, PNorm = 49.3429, GNorm = 1.7907, lr_0 = 7.9101e-04
Loss = 1.5212e-01, PNorm = 49.3639, GNorm = 1.7194, lr_0 = 7.9047e-04
Loss = 1.4845e-01, PNorm = 49.3785, GNorm = 1.1935, lr_0 = 7.8993e-04
Loss = 1.4942e-01, PNorm = 49.3925, GNorm = 1.3665, lr_0 = 7.8939e-04
Loss = 1.4663e-01, PNorm = 49.4078, GNorm = 1.1883, lr_0 = 7.8885e-04
Loss = 1.5323e-01, PNorm = 49.4278, GNorm = 1.5312, lr_0 = 7.8831e-04
Loss = 1.6399e-01, PNorm = 49.4530, GNorm = 1.3339, lr_0 = 7.8777e-04
Loss = 1.4047e-01, PNorm = 49.4740, GNorm = 1.3391, lr_0 = 7.8723e-04
Loss = 1.4036e-01, PNorm = 49.4966, GNorm = 0.6219, lr_0 = 7.8669e-04
Loss = 1.2626e-01, PNorm = 49.5136, GNorm = 0.7388, lr_0 = 7.8615e-04
Loss = 1.4601e-01, PNorm = 49.5257, GNorm = 1.1238, lr_0 = 7.8561e-04
Loss = 1.3713e-01, PNorm = 49.5398, GNorm = 0.7334, lr_0 = 7.8507e-04
Loss = 1.6688e-01, PNorm = 49.5625, GNorm = 1.0069, lr_0 = 7.8454e-04
Loss = 1.5153e-01, PNorm = 49.5851, GNorm = 1.0708, lr_0 = 7.8400e-04
Loss = 1.4364e-01, PNorm = 49.6079, GNorm = 1.2350, lr_0 = 7.8346e-04
Loss = 1.3832e-01, PNorm = 49.6261, GNorm = 0.7759, lr_0 = 7.8293e-04
Loss = 1.5447e-01, PNorm = 49.6464, GNorm = 1.1236, lr_0 = 7.8239e-04
Loss = 1.6710e-01, PNorm = 49.6603, GNorm = 0.7711, lr_0 = 7.8185e-04
Loss = 1.7304e-01, PNorm = 49.6837, GNorm = 2.8807, lr_0 = 7.8132e-04
Validation mae = 0.460039
Epoch 5
Loss = 1.3527e-01, PNorm = 49.7017, GNorm = 0.8693, lr_0 = 7.8078e-04
Loss = 1.3945e-01, PNorm = 49.7245, GNorm = 1.2894, lr_0 = 7.8025e-04
Loss = 1.3974e-01, PNorm = 49.7449, GNorm = 2.2534, lr_0 = 7.7971e-04
Loss = 1.4522e-01, PNorm = 49.7671, GNorm = 1.1797, lr_0 = 7.7918e-04
Loss = 1.7194e-01, PNorm = 49.7879, GNorm = 1.5211, lr_0 = 7.7864e-04
Loss = 1.5326e-01, PNorm = 49.8191, GNorm = 0.8409, lr_0 = 7.7811e-04
Loss = 1.4881e-01, PNorm = 49.8406, GNorm = 2.7701, lr_0 = 7.7758e-04
Loss = 1.2956e-01, PNorm = 49.8628, GNorm = 1.5883, lr_0 = 7.7705e-04
Loss = 1.3928e-01, PNorm = 49.8896, GNorm = 0.9287, lr_0 = 7.7651e-04
Loss = 1.4601e-01, PNorm = 49.9090, GNorm = 1.5135, lr_0 = 7.7598e-04
Loss = 1.1629e-01, PNorm = 49.9204, GNorm = 1.4189, lr_0 = 7.7545e-04
Loss = 1.6193e-01, PNorm = 49.9421, GNorm = 0.8122, lr_0 = 7.7492e-04
Loss = 1.3824e-01, PNorm = 49.9678, GNorm = 0.7588, lr_0 = 7.7439e-04
Loss = 1.3158e-01, PNorm = 49.9904, GNorm = 1.2587, lr_0 = 7.7386e-04
Loss = 1.4218e-01, PNorm = 50.0148, GNorm = 0.8668, lr_0 = 7.7333e-04
Loss = 1.3098e-01, PNorm = 50.0345, GNorm = 0.7955, lr_0 = 7.7280e-04
Loss = 1.5597e-01, PNorm = 50.0557, GNorm = 2.8757, lr_0 = 7.7227e-04
Loss = 1.6650e-01, PNorm = 50.0792, GNorm = 0.8253, lr_0 = 7.7174e-04
Loss = 1.8852e-01, PNorm = 50.1048, GNorm = 3.6840, lr_0 = 7.7121e-04
Loss = 1.6142e-01, PNorm = 50.1302, GNorm = 1.6047, lr_0 = 7.7068e-04
Loss = 1.4221e-01, PNorm = 50.1632, GNorm = 0.7753, lr_0 = 7.7015e-04
Loss = 1.1512e-01, PNorm = 50.1810, GNorm = 0.5041, lr_0 = 7.6963e-04
Loss = 1.3329e-01, PNorm = 50.2002, GNorm = 2.0994, lr_0 = 7.6910e-04
Loss = 1.3356e-01, PNorm = 50.2170, GNorm = 1.3485, lr_0 = 7.6857e-04
Loss = 1.5484e-01, PNorm = 50.2343, GNorm = 1.2418, lr_0 = 7.6805e-04
Loss = 1.2406e-01, PNorm = 50.2500, GNorm = 1.3753, lr_0 = 7.6752e-04
Loss = 1.3602e-01, PNorm = 50.2658, GNorm = 0.8673, lr_0 = 7.6699e-04
Loss = 1.3097e-01, PNorm = 50.2803, GNorm = 0.7996, lr_0 = 7.6647e-04
Loss = 1.3446e-01, PNorm = 50.2993, GNorm = 1.1830, lr_0 = 7.6594e-04
Loss = 1.3508e-01, PNorm = 50.3213, GNorm = 0.8354, lr_0 = 7.6542e-04
Loss = 1.4304e-01, PNorm = 50.3365, GNorm = 1.7084, lr_0 = 7.6489e-04
Loss = 1.5022e-01, PNorm = 50.3574, GNorm = 2.7828, lr_0 = 7.6437e-04
Loss = 1.4721e-01, PNorm = 50.3853, GNorm = 1.6432, lr_0 = 7.6385e-04
Loss = 1.4355e-01, PNorm = 50.4029, GNorm = 1.1598, lr_0 = 7.6332e-04
Loss = 1.3251e-01, PNorm = 50.4143, GNorm = 0.5546, lr_0 = 7.6280e-04
Loss = 1.4492e-01, PNorm = 50.4292, GNorm = 1.1847, lr_0 = 7.6228e-04
Loss = 1.4111e-01, PNorm = 50.4455, GNorm = 1.0961, lr_0 = 7.6176e-04
Loss = 1.3473e-01, PNorm = 50.4647, GNorm = 1.4309, lr_0 = 7.6123e-04
Loss = 1.5017e-01, PNorm = 50.4885, GNorm = 1.4331, lr_0 = 7.6071e-04
Loss = 1.3215e-01, PNorm = 50.5114, GNorm = 0.6744, lr_0 = 7.6019e-04
Loss = 1.3673e-01, PNorm = 50.5261, GNorm = 1.8098, lr_0 = 7.5967e-04
Loss = 1.2872e-01, PNorm = 50.5400, GNorm = 0.8475, lr_0 = 7.5915e-04
Loss = 1.2680e-01, PNorm = 50.5527, GNorm = 1.3326, lr_0 = 7.5863e-04
Loss = 1.2400e-01, PNorm = 50.5659, GNorm = 0.6190, lr_0 = 7.5811e-04
Loss = 1.4709e-01, PNorm = 50.5793, GNorm = 0.7379, lr_0 = 7.5759e-04
Loss = 1.5854e-01, PNorm = 50.5971, GNorm = 2.5051, lr_0 = 7.5707e-04
Loss = 1.5728e-01, PNorm = 50.6167, GNorm = 0.9740, lr_0 = 7.5655e-04
Loss = 1.2618e-01, PNorm = 50.6396, GNorm = 1.4951, lr_0 = 7.5603e-04
Loss = 1.3049e-01, PNorm = 50.6611, GNorm = 0.7721, lr_0 = 7.5552e-04
Loss = 1.5050e-01, PNorm = 50.6737, GNorm = 1.5747, lr_0 = 7.5500e-04
Loss = 1.4331e-01, PNorm = 50.6927, GNorm = 1.2853, lr_0 = 7.5448e-04
Loss = 1.2996e-01, PNorm = 50.7118, GNorm = 1.0131, lr_0 = 7.5397e-04
Loss = 1.3121e-01, PNorm = 50.7314, GNorm = 2.3765, lr_0 = 7.5345e-04
Loss = 1.2278e-01, PNorm = 50.7463, GNorm = 1.3077, lr_0 = 7.5293e-04
Loss = 1.2998e-01, PNorm = 50.7637, GNorm = 1.4100, lr_0 = 7.5242e-04
Loss = 1.4160e-01, PNorm = 50.7822, GNorm = 1.3016, lr_0 = 7.5190e-04
Loss = 1.3758e-01, PNorm = 50.8016, GNorm = 0.7517, lr_0 = 7.5139e-04
Loss = 1.5057e-01, PNorm = 50.8231, GNorm = 0.9377, lr_0 = 7.5087e-04
Loss = 1.3943e-01, PNorm = 50.8398, GNorm = 2.0047, lr_0 = 7.5036e-04
Loss = 1.3980e-01, PNorm = 50.8581, GNorm = 1.1949, lr_0 = 7.4984e-04
Loss = 1.4208e-01, PNorm = 50.8719, GNorm = 1.1790, lr_0 = 7.4933e-04
Loss = 1.5688e-01, PNorm = 50.8913, GNorm = 0.5567, lr_0 = 7.4882e-04
Loss = 1.4797e-01, PNorm = 50.9129, GNorm = 0.7628, lr_0 = 7.4830e-04
Loss = 1.2901e-01, PNorm = 50.9319, GNorm = 2.9625, lr_0 = 7.4779e-04
Loss = 1.4320e-01, PNorm = 50.9551, GNorm = 1.8115, lr_0 = 7.4728e-04
Loss = 1.3284e-01, PNorm = 50.9737, GNorm = 0.6940, lr_0 = 7.4677e-04
Loss = 1.3197e-01, PNorm = 50.9896, GNorm = 0.7336, lr_0 = 7.4625e-04
Loss = 1.4004e-01, PNorm = 51.0114, GNorm = 1.2529, lr_0 = 7.4574e-04
Loss = 1.2066e-01, PNorm = 51.0360, GNorm = 1.0547, lr_0 = 7.4523e-04
Loss = 1.2889e-01, PNorm = 51.0482, GNorm = 0.8388, lr_0 = 7.4472e-04
Loss = 1.1924e-01, PNorm = 51.0602, GNorm = 1.5091, lr_0 = 7.4421e-04
Loss = 1.2701e-01, PNorm = 51.0714, GNorm = 1.8878, lr_0 = 7.4370e-04
Loss = 1.3731e-01, PNorm = 51.0856, GNorm = 1.0462, lr_0 = 7.4319e-04
Loss = 1.3059e-01, PNorm = 51.1028, GNorm = 1.1432, lr_0 = 7.4268e-04
Loss = 1.3556e-01, PNorm = 51.1233, GNorm = 1.6450, lr_0 = 7.4217e-04
Loss = 1.4605e-01, PNorm = 51.1437, GNorm = 0.5256, lr_0 = 7.4167e-04
Loss = 1.4503e-01, PNorm = 51.1613, GNorm = 0.7406, lr_0 = 7.4116e-04
Loss = 1.5112e-01, PNorm = 51.1739, GNorm = 0.8626, lr_0 = 7.4065e-04
Loss = 1.5185e-01, PNorm = 51.2024, GNorm = 0.7844, lr_0 = 7.4014e-04
Loss = 1.3562e-01, PNorm = 51.2193, GNorm = 0.7980, lr_0 = 7.3964e-04
Loss = 1.5194e-01, PNorm = 51.2325, GNorm = 0.7202, lr_0 = 7.3913e-04
Loss = 1.3907e-01, PNorm = 51.2492, GNorm = 0.5593, lr_0 = 7.3862e-04
Loss = 1.4718e-01, PNorm = 51.2689, GNorm = 1.4908, lr_0 = 7.3812e-04
Loss = 1.5412e-01, PNorm = 51.2876, GNorm = 0.7797, lr_0 = 7.3761e-04
Loss = 1.1818e-01, PNorm = 51.3067, GNorm = 0.8487, lr_0 = 7.3711e-04
Loss = 1.3740e-01, PNorm = 51.3227, GNorm = 0.6950, lr_0 = 7.3660e-04
Loss = 1.3617e-01, PNorm = 51.3345, GNorm = 0.7093, lr_0 = 7.3610e-04
Loss = 1.3896e-01, PNorm = 51.3569, GNorm = 0.7247, lr_0 = 7.3559e-04
Loss = 1.4989e-01, PNorm = 51.3750, GNorm = 0.8458, lr_0 = 7.3509e-04
Loss = 1.3324e-01, PNorm = 51.3946, GNorm = 0.5385, lr_0 = 7.3458e-04
Loss = 1.3073e-01, PNorm = 51.4137, GNorm = 0.9795, lr_0 = 7.3408e-04
Loss = 1.2164e-01, PNorm = 51.4225, GNorm = 1.6101, lr_0 = 7.3358e-04
Loss = 1.4984e-01, PNorm = 51.4438, GNorm = 1.8171, lr_0 = 7.3308e-04
Loss = 1.2487e-01, PNorm = 51.4617, GNorm = 1.0092, lr_0 = 7.3257e-04
Loss = 1.2697e-01, PNorm = 51.4766, GNorm = 0.7445, lr_0 = 7.3207e-04
Loss = 1.2603e-01, PNorm = 51.4968, GNorm = 0.5885, lr_0 = 7.3157e-04
Loss = 1.3386e-01, PNorm = 51.5095, GNorm = 1.9258, lr_0 = 7.3107e-04
Loss = 1.3604e-01, PNorm = 51.5183, GNorm = 0.8374, lr_0 = 7.3057e-04
Loss = 1.2538e-01, PNorm = 51.5355, GNorm = 0.8279, lr_0 = 7.3007e-04
Loss = 1.3951e-01, PNorm = 51.5526, GNorm = 0.7414, lr_0 = 7.2957e-04
Loss = 1.3974e-01, PNorm = 51.5659, GNorm = 1.0383, lr_0 = 7.2907e-04
Loss = 1.3952e-01, PNorm = 51.5767, GNorm = 1.3085, lr_0 = 7.2857e-04
Loss = 1.4316e-01, PNorm = 51.5888, GNorm = 1.3782, lr_0 = 7.2807e-04
Loss = 1.7673e-01, PNorm = 51.6020, GNorm = 1.7661, lr_0 = 7.2757e-04
Loss = 1.4384e-01, PNorm = 51.6238, GNorm = 1.3201, lr_0 = 7.2707e-04
Loss = 1.5154e-01, PNorm = 51.6569, GNorm = 1.6662, lr_0 = 7.2657e-04
Loss = 1.3803e-01, PNorm = 51.6835, GNorm = 0.5739, lr_0 = 7.2608e-04
Loss = 1.2407e-01, PNorm = 51.7086, GNorm = 0.5777, lr_0 = 7.2558e-04
Loss = 1.5012e-01, PNorm = 51.7298, GNorm = 0.9707, lr_0 = 7.2508e-04
Loss = 1.3210e-01, PNorm = 51.7384, GNorm = 0.9937, lr_0 = 7.2458e-04
Loss = 1.2857e-01, PNorm = 51.7531, GNorm = 1.5687, lr_0 = 7.2409e-04
Loss = 1.4873e-01, PNorm = 51.7617, GNorm = 2.4381, lr_0 = 7.2359e-04
Loss = 1.3535e-01, PNorm = 51.7747, GNorm = 1.0460, lr_0 = 7.2310e-04
Loss = 1.2086e-01, PNorm = 51.7855, GNorm = 1.7854, lr_0 = 7.2260e-04
Loss = 1.4223e-01, PNorm = 51.8060, GNorm = 1.1164, lr_0 = 7.2211e-04
Loss = 1.4323e-01, PNorm = 51.8211, GNorm = 0.4188, lr_0 = 7.2161e-04
Loss = 1.3362e-01, PNorm = 51.8335, GNorm = 1.2826, lr_0 = 7.2112e-04
Loss = 1.7147e-01, PNorm = 51.8500, GNorm = 1.5227, lr_0 = 7.2062e-04
Loss = 1.8992e-01, PNorm = 51.8751, GNorm = 0.7068, lr_0 = 7.2013e-04
Loss = 1.8269e-01, PNorm = 51.9044, GNorm = 0.9032, lr_0 = 7.1964e-04
Validation mae = 0.452444
Epoch 6
Loss = 1.4273e-01, PNorm = 51.9328, GNorm = 2.3988, lr_0 = 7.1914e-04
Loss = 1.4904e-01, PNorm = 51.9551, GNorm = 1.2606, lr_0 = 7.1865e-04
Loss = 1.3859e-01, PNorm = 51.9775, GNorm = 1.0070, lr_0 = 7.1816e-04
Loss = 1.2838e-01, PNorm = 52.0058, GNorm = 0.8292, lr_0 = 7.1767e-04
Loss = 1.1109e-01, PNorm = 52.0275, GNorm = 0.6907, lr_0 = 7.1717e-04
Loss = 1.2125e-01, PNorm = 52.0418, GNorm = 0.7652, lr_0 = 7.1668e-04
Loss = 1.2665e-01, PNorm = 52.0583, GNorm = 0.5621, lr_0 = 7.1619e-04
Loss = 1.1010e-01, PNorm = 52.0714, GNorm = 0.7482, lr_0 = 7.1570e-04
Loss = 1.1797e-01, PNorm = 52.0897, GNorm = 0.7838, lr_0 = 7.1521e-04
Loss = 1.1825e-01, PNorm = 52.1127, GNorm = 0.7329, lr_0 = 7.1472e-04
Loss = 1.3007e-01, PNorm = 52.1303, GNorm = 1.4247, lr_0 = 7.1423e-04
Loss = 1.2807e-01, PNorm = 52.1557, GNorm = 0.7436, lr_0 = 7.1374e-04
Loss = 1.5513e-01, PNorm = 52.1760, GNorm = 1.1436, lr_0 = 7.1325e-04
Loss = 1.2656e-01, PNorm = 52.1947, GNorm = 0.5923, lr_0 = 7.1277e-04
Loss = 1.3572e-01, PNorm = 52.2111, GNorm = 0.9946, lr_0 = 7.1228e-04
Loss = 1.2821e-01, PNorm = 52.2292, GNorm = 0.7451, lr_0 = 7.1179e-04
Loss = 1.4158e-01, PNorm = 52.2468, GNorm = 0.7453, lr_0 = 7.1130e-04
Loss = 1.1703e-01, PNorm = 52.2586, GNorm = 1.1240, lr_0 = 7.1081e-04
Loss = 1.1493e-01, PNorm = 52.2695, GNorm = 0.8028, lr_0 = 7.1033e-04
Loss = 1.1808e-01, PNorm = 52.2838, GNorm = 0.9823, lr_0 = 7.0984e-04
Loss = 1.3460e-01, PNorm = 52.2997, GNorm = 0.5679, lr_0 = 7.0935e-04
Loss = 1.1666e-01, PNorm = 52.3095, GNorm = 0.7339, lr_0 = 7.0887e-04
Loss = 1.2714e-01, PNorm = 52.3228, GNorm = 1.0894, lr_0 = 7.0838e-04
Loss = 1.3010e-01, PNorm = 52.3373, GNorm = 0.7987, lr_0 = 7.0790e-04
Loss = 1.2494e-01, PNorm = 52.3525, GNorm = 0.7590, lr_0 = 7.0741e-04
Loss = 1.3633e-01, PNorm = 52.3736, GNorm = 1.3003, lr_0 = 7.0693e-04
Loss = 1.2696e-01, PNorm = 52.3960, GNorm = 1.3930, lr_0 = 7.0644e-04
Loss = 1.3930e-01, PNorm = 52.4152, GNorm = 1.6506, lr_0 = 7.0596e-04
Loss = 1.3676e-01, PNorm = 52.4344, GNorm = 2.1101, lr_0 = 7.0548e-04
Loss = 1.2869e-01, PNorm = 52.4579, GNorm = 0.5150, lr_0 = 7.0499e-04
Loss = 1.2536e-01, PNorm = 52.4788, GNorm = 1.7274, lr_0 = 7.0451e-04
Loss = 1.2263e-01, PNorm = 52.4996, GNorm = 1.3703, lr_0 = 7.0403e-04
Loss = 1.3492e-01, PNorm = 52.5167, GNorm = 0.9309, lr_0 = 7.0354e-04
Loss = 1.5752e-01, PNorm = 52.5313, GNorm = 0.9945, lr_0 = 7.0306e-04
Loss = 1.4698e-01, PNorm = 52.5521, GNorm = 0.5602, lr_0 = 7.0258e-04
Loss = 1.4141e-01, PNorm = 52.5721, GNorm = 1.0714, lr_0 = 7.0210e-04
Loss = 1.2757e-01, PNorm = 52.5894, GNorm = 0.8525, lr_0 = 7.0162e-04
Loss = 1.3008e-01, PNorm = 52.6068, GNorm = 1.7885, lr_0 = 7.0114e-04
Loss = 1.2266e-01, PNorm = 52.6267, GNorm = 1.9653, lr_0 = 7.0066e-04
Loss = 1.2335e-01, PNorm = 52.6419, GNorm = 0.9096, lr_0 = 7.0018e-04
Loss = 1.1644e-01, PNorm = 52.6571, GNorm = 1.1891, lr_0 = 6.9970e-04
Loss = 1.2387e-01, PNorm = 52.6774, GNorm = 1.9232, lr_0 = 6.9922e-04
Loss = 1.2334e-01, PNorm = 52.6930, GNorm = 1.0629, lr_0 = 6.9874e-04
Loss = 1.2582e-01, PNorm = 52.7082, GNorm = 1.2488, lr_0 = 6.9826e-04
Loss = 1.2325e-01, PNorm = 52.7262, GNorm = 0.9220, lr_0 = 6.9778e-04
Loss = 1.3505e-01, PNorm = 52.7464, GNorm = 0.8297, lr_0 = 6.9730e-04
Loss = 1.3158e-01, PNorm = 52.7727, GNorm = 0.8694, lr_0 = 6.9683e-04
Loss = 1.0633e-01, PNorm = 52.7953, GNorm = 0.9613, lr_0 = 6.9635e-04
Loss = 1.1788e-01, PNorm = 52.8098, GNorm = 0.5151, lr_0 = 6.9587e-04
Loss = 1.2408e-01, PNorm = 52.8248, GNorm = 1.0702, lr_0 = 6.9540e-04
Loss = 1.1897e-01, PNorm = 52.8350, GNorm = 0.9122, lr_0 = 6.9492e-04
Loss = 1.3928e-01, PNorm = 52.8505, GNorm = 1.9845, lr_0 = 6.9444e-04
Loss = 1.4711e-01, PNorm = 52.8687, GNorm = 1.5492, lr_0 = 6.9397e-04
Loss = 1.4597e-01, PNorm = 52.8994, GNorm = 0.8125, lr_0 = 6.9349e-04
Loss = 1.3653e-01, PNorm = 52.9226, GNorm = 0.9349, lr_0 = 6.9302e-04
Loss = 1.1620e-01, PNorm = 52.9402, GNorm = 1.6569, lr_0 = 6.9254e-04
Loss = 1.3439e-01, PNorm = 52.9536, GNorm = 1.6733, lr_0 = 6.9207e-04
Loss = 1.4909e-01, PNorm = 52.9753, GNorm = 0.7297, lr_0 = 6.9159e-04
Loss = 1.3344e-01, PNorm = 52.9946, GNorm = 0.6638, lr_0 = 6.9112e-04
Loss = 1.2703e-01, PNorm = 53.0149, GNorm = 0.8284, lr_0 = 6.9065e-04
Loss = 1.3977e-01, PNorm = 53.0390, GNorm = 1.4514, lr_0 = 6.9017e-04
Loss = 1.4697e-01, PNorm = 53.0590, GNorm = 0.8751, lr_0 = 6.8970e-04
Loss = 1.2543e-01, PNorm = 53.0797, GNorm = 0.9736, lr_0 = 6.8923e-04
Loss = 1.3440e-01, PNorm = 53.1050, GNorm = 1.0264, lr_0 = 6.8876e-04
Loss = 1.3143e-01, PNorm = 53.1259, GNorm = 0.7499, lr_0 = 6.8828e-04
Loss = 1.3115e-01, PNorm = 53.1457, GNorm = 1.0283, lr_0 = 6.8781e-04
Loss = 1.4953e-01, PNorm = 53.1624, GNorm = 1.0348, lr_0 = 6.8734e-04
Loss = 1.3272e-01, PNorm = 53.1767, GNorm = 0.7986, lr_0 = 6.8687e-04
Loss = 1.1277e-01, PNorm = 53.1946, GNorm = 0.8699, lr_0 = 6.8640e-04
Loss = 1.2825e-01, PNorm = 53.2056, GNorm = 0.5453, lr_0 = 6.8593e-04
Loss = 1.2838e-01, PNorm = 53.2176, GNorm = 0.9108, lr_0 = 6.8546e-04
Loss = 1.2365e-01, PNorm = 53.2299, GNorm = 1.3983, lr_0 = 6.8499e-04
Loss = 1.1789e-01, PNorm = 53.2424, GNorm = 1.1668, lr_0 = 6.8452e-04
Loss = 1.4829e-01, PNorm = 53.2579, GNorm = 0.8591, lr_0 = 6.8405e-04
Loss = 1.3858e-01, PNorm = 53.2774, GNorm = 0.7005, lr_0 = 6.8358e-04
Loss = 1.2449e-01, PNorm = 53.2984, GNorm = 0.9184, lr_0 = 6.8312e-04
Loss = 1.2456e-01, PNorm = 53.3128, GNorm = 1.7384, lr_0 = 6.8265e-04
Loss = 1.5577e-01, PNorm = 53.3320, GNorm = 0.9516, lr_0 = 6.8218e-04
Loss = 1.1111e-01, PNorm = 53.3533, GNorm = 0.8068, lr_0 = 6.8171e-04
Loss = 1.3394e-01, PNorm = 53.3732, GNorm = 1.0422, lr_0 = 6.8125e-04
Loss = 1.2020e-01, PNorm = 53.3907, GNorm = 0.6959, lr_0 = 6.8078e-04
Loss = 1.1056e-01, PNorm = 53.4118, GNorm = 0.6021, lr_0 = 6.8031e-04
Loss = 1.2912e-01, PNorm = 53.4311, GNorm = 0.9483, lr_0 = 6.7985e-04
Loss = 1.4015e-01, PNorm = 53.4488, GNorm = 0.7528, lr_0 = 6.7938e-04
Loss = 1.2171e-01, PNorm = 53.4700, GNorm = 0.5440, lr_0 = 6.7892e-04
Loss = 1.2254e-01, PNorm = 53.4863, GNorm = 0.7028, lr_0 = 6.7845e-04
Loss = 1.2277e-01, PNorm = 53.4997, GNorm = 1.2585, lr_0 = 6.7799e-04
Loss = 1.2564e-01, PNorm = 53.5128, GNorm = 1.2864, lr_0 = 6.7752e-04
Loss = 1.4271e-01, PNorm = 53.5420, GNorm = 1.2078, lr_0 = 6.7706e-04
Loss = 1.3647e-01, PNorm = 53.5640, GNorm = 0.8446, lr_0 = 6.7659e-04
Loss = 1.4464e-01, PNorm = 53.5770, GNorm = 0.9214, lr_0 = 6.7613e-04
Loss = 1.3867e-01, PNorm = 53.5910, GNorm = 0.7855, lr_0 = 6.7567e-04
Loss = 1.2304e-01, PNorm = 53.6093, GNorm = 1.6915, lr_0 = 6.7520e-04
Loss = 1.2892e-01, PNorm = 53.6252, GNorm = 0.9121, lr_0 = 6.7474e-04
Loss = 1.3027e-01, PNorm = 53.6417, GNorm = 0.7017, lr_0 = 6.7428e-04
Loss = 1.2653e-01, PNorm = 53.6603, GNorm = 0.9078, lr_0 = 6.7382e-04
Loss = 1.1095e-01, PNorm = 53.6798, GNorm = 0.5544, lr_0 = 6.7335e-04
Loss = 1.2023e-01, PNorm = 53.6946, GNorm = 1.2044, lr_0 = 6.7289e-04
Loss = 1.3077e-01, PNorm = 53.7025, GNorm = 1.2216, lr_0 = 6.7243e-04
Loss = 1.2691e-01, PNorm = 53.7090, GNorm = 0.7393, lr_0 = 6.7197e-04
Loss = 1.0844e-01, PNorm = 53.7219, GNorm = 1.3515, lr_0 = 6.7151e-04
Loss = 1.3324e-01, PNorm = 53.7378, GNorm = 0.7870, lr_0 = 6.7105e-04
Loss = 1.4243e-01, PNorm = 53.7520, GNorm = 0.6689, lr_0 = 6.7059e-04
Loss = 1.2653e-01, PNorm = 53.7698, GNorm = 0.8396, lr_0 = 6.7013e-04
Loss = 1.2687e-01, PNorm = 53.7882, GNorm = 1.5495, lr_0 = 6.6967e-04
Loss = 1.1746e-01, PNorm = 53.8024, GNorm = 1.3917, lr_0 = 6.6921e-04
Loss = 1.3343e-01, PNorm = 53.8123, GNorm = 0.7510, lr_0 = 6.6876e-04
Loss = 1.2209e-01, PNorm = 53.8264, GNorm = 0.8805, lr_0 = 6.6830e-04
Loss = 1.4422e-01, PNorm = 53.8444, GNorm = 0.9852, lr_0 = 6.6784e-04
Loss = 1.4129e-01, PNorm = 53.8570, GNorm = 2.9516, lr_0 = 6.6738e-04
Loss = 1.2786e-01, PNorm = 53.8725, GNorm = 1.1152, lr_0 = 6.6693e-04
Loss = 1.5117e-01, PNorm = 53.8925, GNorm = 0.8795, lr_0 = 6.6647e-04
Loss = 1.2726e-01, PNorm = 53.9110, GNorm = 1.2049, lr_0 = 6.6601e-04
Loss = 1.4245e-01, PNorm = 53.9267, GNorm = 0.9842, lr_0 = 6.6556e-04
Loss = 1.4436e-01, PNorm = 53.9463, GNorm = 1.1312, lr_0 = 6.6510e-04
Loss = 1.1119e-01, PNorm = 53.9638, GNorm = 0.7538, lr_0 = 6.6464e-04
Loss = 1.3398e-01, PNorm = 53.9792, GNorm = 1.2658, lr_0 = 6.6419e-04
Loss = 1.3401e-01, PNorm = 54.0003, GNorm = 1.2517, lr_0 = 6.6373e-04
Loss = 1.3312e-01, PNorm = 54.0222, GNorm = 1.1243, lr_0 = 6.6328e-04
Loss = 1.4014e-01, PNorm = 54.0445, GNorm = 0.7322, lr_0 = 6.6282e-04
Validation mae = 0.445798
Epoch 7
Loss = 1.4084e-01, PNorm = 54.0651, GNorm = 0.5889, lr_0 = 6.6237e-04
Loss = 1.0846e-01, PNorm = 54.0846, GNorm = 0.8298, lr_0 = 6.6192e-04
Loss = 1.1120e-01, PNorm = 54.0992, GNorm = 0.5701, lr_0 = 6.6146e-04
Loss = 1.3135e-01, PNorm = 54.1158, GNorm = 1.4240, lr_0 = 6.6101e-04
Loss = 1.1322e-01, PNorm = 54.1297, GNorm = 1.1881, lr_0 = 6.6056e-04
Loss = 9.6467e-02, PNorm = 54.1451, GNorm = 0.5895, lr_0 = 6.6011e-04
Loss = 1.2477e-01, PNorm = 54.1607, GNorm = 1.5950, lr_0 = 6.5965e-04
Loss = 1.1692e-01, PNorm = 54.1755, GNorm = 1.1237, lr_0 = 6.5920e-04
Loss = 1.2505e-01, PNorm = 54.1912, GNorm = 0.5801, lr_0 = 6.5875e-04
Loss = 1.0691e-01, PNorm = 54.2078, GNorm = 0.5730, lr_0 = 6.5830e-04
Loss = 1.2183e-01, PNorm = 54.2204, GNorm = 0.9415, lr_0 = 6.5785e-04
Loss = 1.1862e-01, PNorm = 54.2372, GNorm = 0.9470, lr_0 = 6.5740e-04
Loss = 1.0321e-01, PNorm = 54.2535, GNorm = 1.1092, lr_0 = 6.5695e-04
Loss = 1.2726e-01, PNorm = 54.2705, GNorm = 0.9072, lr_0 = 6.5650e-04
Loss = 1.2442e-01, PNorm = 54.2854, GNorm = 1.1287, lr_0 = 6.5605e-04
Loss = 1.0431e-01, PNorm = 54.3024, GNorm = 0.6238, lr_0 = 6.5560e-04
Loss = 1.1696e-01, PNorm = 54.3164, GNorm = 1.3434, lr_0 = 6.5515e-04
Loss = 9.7448e-02, PNorm = 54.3283, GNorm = 0.5074, lr_0 = 6.5470e-04
Loss = 9.6897e-02, PNorm = 54.3427, GNorm = 0.6623, lr_0 = 6.5425e-04
Loss = 1.1206e-01, PNorm = 54.3558, GNorm = 0.7514, lr_0 = 6.5380e-04
Loss = 1.0866e-01, PNorm = 54.3691, GNorm = 1.3514, lr_0 = 6.5335e-04
Loss = 1.1929e-01, PNorm = 54.3900, GNorm = 1.0797, lr_0 = 6.5291e-04
Loss = 1.3422e-01, PNorm = 54.4102, GNorm = 0.7835, lr_0 = 6.5246e-04
Loss = 1.0916e-01, PNorm = 54.4259, GNorm = 1.3128, lr_0 = 6.5201e-04
Loss = 1.2567e-01, PNorm = 54.4422, GNorm = 0.7883, lr_0 = 6.5157e-04
Loss = 9.3397e-02, PNorm = 54.4576, GNorm = 0.6440, lr_0 = 6.5112e-04
Loss = 1.0670e-01, PNorm = 54.4689, GNorm = 1.5513, lr_0 = 6.5067e-04
Loss = 1.3511e-01, PNorm = 54.4812, GNorm = 0.5878, lr_0 = 6.5023e-04
Loss = 1.1177e-01, PNorm = 54.4980, GNorm = 0.7143, lr_0 = 6.4978e-04
Loss = 1.0218e-01, PNorm = 54.5122, GNorm = 0.5516, lr_0 = 6.4934e-04
Loss = 1.2290e-01, PNorm = 54.5238, GNorm = 1.5879, lr_0 = 6.4889e-04
Loss = 1.0980e-01, PNorm = 54.5381, GNorm = 1.4081, lr_0 = 6.4845e-04
Loss = 1.0286e-01, PNorm = 54.5508, GNorm = 0.6830, lr_0 = 6.4800e-04
Loss = 1.2355e-01, PNorm = 54.5666, GNorm = 1.9553, lr_0 = 6.4756e-04
Loss = 1.3066e-01, PNorm = 54.5841, GNorm = 1.5433, lr_0 = 6.4712e-04
Loss = 1.2171e-01, PNorm = 54.6050, GNorm = 1.1804, lr_0 = 6.4667e-04
Loss = 1.2563e-01, PNorm = 54.6210, GNorm = 0.9329, lr_0 = 6.4623e-04
Loss = 1.3134e-01, PNorm = 54.6359, GNorm = 0.5544, lr_0 = 6.4579e-04
Loss = 1.1497e-01, PNorm = 54.6550, GNorm = 0.7035, lr_0 = 6.4534e-04
Loss = 1.0817e-01, PNorm = 54.6703, GNorm = 0.9538, lr_0 = 6.4490e-04
Loss = 1.0432e-01, PNorm = 54.6842, GNorm = 0.9503, lr_0 = 6.4446e-04
Loss = 1.2096e-01, PNorm = 54.7005, GNorm = 1.0566, lr_0 = 6.4402e-04
Loss = 1.2923e-01, PNorm = 54.7198, GNorm = 1.0178, lr_0 = 6.4358e-04
Loss = 1.2443e-01, PNorm = 54.7325, GNorm = 1.0806, lr_0 = 6.4314e-04
Loss = 1.2645e-01, PNorm = 54.7508, GNorm = 1.1012, lr_0 = 6.4270e-04
Loss = 1.2031e-01, PNorm = 54.7712, GNorm = 0.7295, lr_0 = 6.4226e-04
Loss = 1.3141e-01, PNorm = 54.7877, GNorm = 0.4998, lr_0 = 6.4182e-04
Loss = 1.1585e-01, PNorm = 54.8088, GNorm = 1.4413, lr_0 = 6.4138e-04
Loss = 1.1979e-01, PNorm = 54.8309, GNorm = 1.7682, lr_0 = 6.4094e-04
Loss = 1.1736e-01, PNorm = 54.8470, GNorm = 0.9862, lr_0 = 6.4050e-04
Loss = 1.1192e-01, PNorm = 54.8585, GNorm = 1.4628, lr_0 = 6.4006e-04
Loss = 1.1323e-01, PNorm = 54.8684, GNorm = 1.2269, lr_0 = 6.3962e-04
Loss = 1.2232e-01, PNorm = 54.8830, GNorm = 1.9391, lr_0 = 6.3918e-04
Loss = 1.4449e-01, PNorm = 54.9006, GNorm = 1.2372, lr_0 = 6.3874e-04
Loss = 1.3195e-01, PNorm = 54.9173, GNorm = 0.7696, lr_0 = 6.3831e-04
Loss = 1.3210e-01, PNorm = 54.9367, GNorm = 1.2864, lr_0 = 6.3787e-04
Loss = 1.3750e-01, PNorm = 54.9544, GNorm = 0.8076, lr_0 = 6.3743e-04
Loss = 1.2176e-01, PNorm = 54.9738, GNorm = 0.6066, lr_0 = 6.3700e-04
Loss = 1.1278e-01, PNorm = 54.9943, GNorm = 0.7917, lr_0 = 6.3656e-04
Loss = 1.2657e-01, PNorm = 55.0139, GNorm = 0.8102, lr_0 = 6.3612e-04
Loss = 1.1531e-01, PNorm = 55.0302, GNorm = 0.8336, lr_0 = 6.3569e-04
Loss = 1.3448e-01, PNorm = 55.0456, GNorm = 0.7848, lr_0 = 6.3525e-04
Loss = 9.7940e-02, PNorm = 55.0656, GNorm = 0.4751, lr_0 = 6.3482e-04
Loss = 1.0867e-01, PNorm = 55.0781, GNorm = 0.7352, lr_0 = 6.3438e-04
Loss = 1.2258e-01, PNorm = 55.0939, GNorm = 1.5331, lr_0 = 6.3395e-04
Loss = 1.0148e-01, PNorm = 55.1056, GNorm = 0.7714, lr_0 = 6.3351e-04
Loss = 1.0619e-01, PNorm = 55.1173, GNorm = 1.3280, lr_0 = 6.3308e-04
Loss = 1.4687e-01, PNorm = 55.1357, GNorm = 1.1478, lr_0 = 6.3265e-04
Loss = 1.1010e-01, PNorm = 55.1548, GNorm = 0.6026, lr_0 = 6.3221e-04
Loss = 1.1971e-01, PNorm = 55.1688, GNorm = 0.5771, lr_0 = 6.3178e-04
Loss = 1.2026e-01, PNorm = 55.1804, GNorm = 1.9470, lr_0 = 6.3135e-04
Loss = 1.1882e-01, PNorm = 55.1953, GNorm = 0.5690, lr_0 = 6.3091e-04
Loss = 1.1057e-01, PNorm = 55.2138, GNorm = 0.8683, lr_0 = 6.3048e-04
Loss = 1.2943e-01, PNorm = 55.2296, GNorm = 1.0882, lr_0 = 6.3005e-04
Loss = 1.2140e-01, PNorm = 55.2418, GNorm = 0.6988, lr_0 = 6.2962e-04
Loss = 1.4532e-01, PNorm = 55.2605, GNorm = 1.2292, lr_0 = 6.2919e-04
Loss = 1.3543e-01, PNorm = 55.2848, GNorm = 1.8657, lr_0 = 6.2876e-04
Loss = 1.2468e-01, PNorm = 55.2996, GNorm = 0.5607, lr_0 = 6.2833e-04
Loss = 1.0920e-01, PNorm = 55.3142, GNorm = 0.8075, lr_0 = 6.2789e-04
Loss = 1.1767e-01, PNorm = 55.3230, GNorm = 0.5523, lr_0 = 6.2746e-04
Loss = 1.1462e-01, PNorm = 55.3327, GNorm = 0.6868, lr_0 = 6.2703e-04
Loss = 1.1265e-01, PNorm = 55.3469, GNorm = 0.6686, lr_0 = 6.2661e-04
Loss = 1.1752e-01, PNorm = 55.3655, GNorm = 0.8786, lr_0 = 6.2618e-04
Loss = 1.1866e-01, PNorm = 55.3833, GNorm = 0.7320, lr_0 = 6.2575e-04
Loss = 1.4204e-01, PNorm = 55.4059, GNorm = 0.9244, lr_0 = 6.2532e-04
Loss = 1.2504e-01, PNorm = 55.4297, GNorm = 0.9343, lr_0 = 6.2489e-04
Loss = 1.0134e-01, PNorm = 55.4479, GNorm = 0.6093, lr_0 = 6.2446e-04
Loss = 1.2431e-01, PNorm = 55.4563, GNorm = 1.1068, lr_0 = 6.2403e-04
Loss = 1.2080e-01, PNorm = 55.4693, GNorm = 1.3868, lr_0 = 6.2361e-04
Loss = 1.3306e-01, PNorm = 55.4851, GNorm = 1.8172, lr_0 = 6.2318e-04
Loss = 1.1174e-01, PNorm = 55.4989, GNorm = 0.6126, lr_0 = 6.2275e-04
Loss = 1.2367e-01, PNorm = 55.5128, GNorm = 1.3623, lr_0 = 6.2233e-04
Loss = 1.0296e-01, PNorm = 55.5273, GNorm = 0.6835, lr_0 = 6.2190e-04
Loss = 1.1124e-01, PNorm = 55.5386, GNorm = 0.8469, lr_0 = 6.2147e-04
Loss = 1.2240e-01, PNorm = 55.5529, GNorm = 0.7459, lr_0 = 6.2105e-04
Loss = 1.1404e-01, PNorm = 55.5652, GNorm = 0.6219, lr_0 = 6.2062e-04
Loss = 1.3982e-01, PNorm = 55.5854, GNorm = 0.9038, lr_0 = 6.2020e-04
Loss = 1.2858e-01, PNorm = 55.6054, GNorm = 1.0664, lr_0 = 6.1977e-04
Loss = 1.1804e-01, PNorm = 55.6227, GNorm = 1.1793, lr_0 = 6.1935e-04
Loss = 1.1877e-01, PNorm = 55.6339, GNorm = 1.2765, lr_0 = 6.1892e-04
Loss = 1.1644e-01, PNorm = 55.6453, GNorm = 0.6757, lr_0 = 6.1850e-04
Loss = 1.1527e-01, PNorm = 55.6541, GNorm = 0.8447, lr_0 = 6.1808e-04
Loss = 1.1491e-01, PNorm = 55.6696, GNorm = 0.9757, lr_0 = 6.1765e-04
Loss = 1.1861e-01, PNorm = 55.6904, GNorm = 0.6561, lr_0 = 6.1723e-04
Loss = 1.2127e-01, PNorm = 55.7083, GNorm = 0.7942, lr_0 = 6.1681e-04
Loss = 1.0726e-01, PNorm = 55.7246, GNorm = 0.7157, lr_0 = 6.1638e-04
Loss = 1.1907e-01, PNorm = 55.7385, GNorm = 0.7620, lr_0 = 6.1596e-04
Loss = 1.2071e-01, PNorm = 55.7568, GNorm = 1.2825, lr_0 = 6.1554e-04
Loss = 1.1280e-01, PNorm = 55.7681, GNorm = 0.6805, lr_0 = 6.1512e-04
Loss = 1.0758e-01, PNorm = 55.7832, GNorm = 0.9787, lr_0 = 6.1470e-04
Loss = 1.2510e-01, PNorm = 55.7938, GNorm = 1.2160, lr_0 = 6.1428e-04
Loss = 1.3040e-01, PNorm = 55.8058, GNorm = 0.8478, lr_0 = 6.1385e-04
Loss = 1.2075e-01, PNorm = 55.8179, GNorm = 0.8678, lr_0 = 6.1343e-04
Loss = 1.2188e-01, PNorm = 55.8270, GNorm = 1.0781, lr_0 = 6.1301e-04
Loss = 1.3057e-01, PNorm = 55.8417, GNorm = 1.0473, lr_0 = 6.1259e-04
Loss = 1.1165e-01, PNorm = 55.8583, GNorm = 0.5030, lr_0 = 6.1217e-04
Loss = 1.1440e-01, PNorm = 55.8769, GNorm = 0.8645, lr_0 = 6.1175e-04
Loss = 1.0915e-01, PNorm = 55.8929, GNorm = 0.6778, lr_0 = 6.1134e-04
Loss = 1.1503e-01, PNorm = 55.9041, GNorm = 0.7076, lr_0 = 6.1092e-04
Loss = 1.1386e-01, PNorm = 55.9115, GNorm = 0.5941, lr_0 = 6.1050e-04
Validation mae = 0.411970
Epoch 8
Loss = 1.1362e-01, PNorm = 55.9205, GNorm = 0.5808, lr_0 = 6.1008e-04
Loss = 9.7290e-02, PNorm = 55.9357, GNorm = 1.0155, lr_0 = 6.0966e-04
Loss = 1.1602e-01, PNorm = 55.9541, GNorm = 0.6095, lr_0 = 6.0924e-04
Loss = 9.6606e-02, PNorm = 55.9666, GNorm = 0.8426, lr_0 = 6.0883e-04
Loss = 9.8598e-02, PNorm = 55.9772, GNorm = 1.1043, lr_0 = 6.0841e-04
Loss = 1.0172e-01, PNorm = 55.9934, GNorm = 0.5843, lr_0 = 6.0799e-04
Loss = 1.0795e-01, PNorm = 56.0095, GNorm = 0.7555, lr_0 = 6.0758e-04
Loss = 1.1336e-01, PNorm = 56.0244, GNorm = 0.9192, lr_0 = 6.0716e-04
Loss = 8.4930e-02, PNorm = 56.0406, GNorm = 0.8650, lr_0 = 6.0674e-04
Loss = 1.0220e-01, PNorm = 56.0610, GNorm = 0.9916, lr_0 = 6.0633e-04
Loss = 9.7532e-02, PNorm = 56.0736, GNorm = 0.5687, lr_0 = 6.0591e-04
Loss = 9.2499e-02, PNorm = 56.0842, GNorm = 0.6918, lr_0 = 6.0550e-04
Loss = 1.0519e-01, PNorm = 56.1013, GNorm = 0.8290, lr_0 = 6.0508e-04
Loss = 1.1727e-01, PNorm = 56.1159, GNorm = 1.7501, lr_0 = 6.0467e-04
Loss = 9.6601e-02, PNorm = 56.1306, GNorm = 0.6066, lr_0 = 6.0425e-04
Loss = 1.0179e-01, PNorm = 56.1445, GNorm = 0.7562, lr_0 = 6.0384e-04
Loss = 1.2306e-01, PNorm = 56.1567, GNorm = 0.5760, lr_0 = 6.0343e-04
Loss = 9.9959e-02, PNorm = 56.1703, GNorm = 1.0988, lr_0 = 6.0301e-04
Loss = 1.0501e-01, PNorm = 56.1831, GNorm = 0.5553, lr_0 = 6.0260e-04
Loss = 1.0366e-01, PNorm = 56.1930, GNorm = 0.6424, lr_0 = 6.0219e-04
Loss = 1.1615e-01, PNorm = 56.2069, GNorm = 0.8430, lr_0 = 6.0178e-04
Loss = 1.0328e-01, PNorm = 56.2201, GNorm = 1.7509, lr_0 = 6.0136e-04
Loss = 1.2285e-01, PNorm = 56.2365, GNorm = 0.7165, lr_0 = 6.0095e-04
Loss = 1.2013e-01, PNorm = 56.2563, GNorm = 0.5887, lr_0 = 6.0054e-04
Loss = 1.2190e-01, PNorm = 56.2769, GNorm = 0.6349, lr_0 = 6.0013e-04
Loss = 1.0767e-01, PNorm = 56.2981, GNorm = 0.6824, lr_0 = 5.9972e-04
Loss = 1.0466e-01, PNorm = 56.3113, GNorm = 0.6748, lr_0 = 5.9931e-04
Loss = 1.0922e-01, PNorm = 56.3228, GNorm = 0.5488, lr_0 = 5.9890e-04
Loss = 1.1312e-01, PNorm = 56.3324, GNorm = 0.7720, lr_0 = 5.9849e-04
Loss = 1.2082e-01, PNorm = 56.3478, GNorm = 0.8651, lr_0 = 5.9808e-04
Loss = 1.0379e-01, PNorm = 56.3642, GNorm = 0.6883, lr_0 = 5.9767e-04
Loss = 1.0896e-01, PNorm = 56.3842, GNorm = 0.8537, lr_0 = 5.9726e-04
Loss = 1.2262e-01, PNorm = 56.4032, GNorm = 1.3288, lr_0 = 5.9685e-04
Loss = 1.0058e-01, PNorm = 56.4159, GNorm = 0.8143, lr_0 = 5.9644e-04
Loss = 1.2739e-01, PNorm = 56.4249, GNorm = 0.9337, lr_0 = 5.9603e-04
Loss = 1.2640e-01, PNorm = 56.4450, GNorm = 0.6007, lr_0 = 5.9562e-04
Loss = 1.1909e-01, PNorm = 56.4630, GNorm = 0.6100, lr_0 = 5.9521e-04
Loss = 1.0032e-01, PNorm = 56.4815, GNorm = 0.5346, lr_0 = 5.9481e-04
Loss = 1.1963e-01, PNorm = 56.5060, GNorm = 0.6239, lr_0 = 5.9440e-04
Loss = 1.1826e-01, PNorm = 56.5237, GNorm = 0.7177, lr_0 = 5.9399e-04
Loss = 1.1050e-01, PNorm = 56.5414, GNorm = 0.4978, lr_0 = 5.9358e-04
Loss = 1.0521e-01, PNorm = 56.5532, GNorm = 0.6142, lr_0 = 5.9318e-04
Loss = 9.8457e-02, PNorm = 56.5626, GNorm = 0.9405, lr_0 = 5.9277e-04
Loss = 1.1368e-01, PNorm = 56.5711, GNorm = 1.5935, lr_0 = 5.9236e-04
Loss = 9.2056e-02, PNorm = 56.5849, GNorm = 0.6937, lr_0 = 5.9196e-04
Loss = 1.0772e-01, PNorm = 56.5974, GNorm = 0.6277, lr_0 = 5.9155e-04
Loss = 1.2564e-01, PNorm = 56.6078, GNorm = 0.6956, lr_0 = 5.9115e-04
Loss = 1.1553e-01, PNorm = 56.6239, GNorm = 0.9991, lr_0 = 5.9074e-04
Loss = 1.2889e-01, PNorm = 56.6446, GNorm = 0.9318, lr_0 = 5.9034e-04
Loss = 1.3041e-01, PNorm = 56.6645, GNorm = 1.2223, lr_0 = 5.8993e-04
Loss = 1.5672e-01, PNorm = 56.6874, GNorm = 1.9384, lr_0 = 5.8953e-04
Loss = 1.1849e-01, PNorm = 56.7130, GNorm = 1.7767, lr_0 = 5.8913e-04
Loss = 1.1037e-01, PNorm = 56.7374, GNorm = 1.1082, lr_0 = 5.8872e-04
Loss = 1.0804e-01, PNorm = 56.7539, GNorm = 0.8891, lr_0 = 5.8832e-04
Loss = 1.1706e-01, PNorm = 56.7688, GNorm = 0.8509, lr_0 = 5.8792e-04
Loss = 1.1069e-01, PNorm = 56.7837, GNorm = 0.6096, lr_0 = 5.8751e-04
Loss = 1.0595e-01, PNorm = 56.7998, GNorm = 1.3981, lr_0 = 5.8711e-04
Loss = 8.9368e-02, PNorm = 56.8124, GNorm = 0.6510, lr_0 = 5.8671e-04
Loss = 1.1470e-01, PNorm = 56.8242, GNorm = 0.8424, lr_0 = 5.8631e-04
Loss = 9.9494e-02, PNorm = 56.8348, GNorm = 0.5680, lr_0 = 5.8591e-04
Loss = 1.0118e-01, PNorm = 56.8443, GNorm = 1.1197, lr_0 = 5.8550e-04
Loss = 9.4360e-02, PNorm = 56.8589, GNorm = 0.5733, lr_0 = 5.8510e-04
Loss = 1.0766e-01, PNorm = 56.8773, GNorm = 0.8292, lr_0 = 5.8470e-04
Loss = 1.2527e-01, PNorm = 56.8916, GNorm = 1.4003, lr_0 = 5.8430e-04
Loss = 1.1897e-01, PNorm = 56.9060, GNorm = 0.8322, lr_0 = 5.8390e-04
Loss = 1.1200e-01, PNorm = 56.9250, GNorm = 0.5559, lr_0 = 5.8350e-04
Loss = 1.0633e-01, PNorm = 56.9406, GNorm = 0.7169, lr_0 = 5.8310e-04
Loss = 1.0106e-01, PNorm = 56.9497, GNorm = 0.6407, lr_0 = 5.8270e-04
Loss = 1.1463e-01, PNorm = 56.9588, GNorm = 1.2984, lr_0 = 5.8230e-04
Loss = 1.1361e-01, PNorm = 56.9730, GNorm = 1.0150, lr_0 = 5.8190e-04
Loss = 1.0578e-01, PNorm = 56.9873, GNorm = 0.7953, lr_0 = 5.8151e-04
Loss = 1.1513e-01, PNorm = 57.0008, GNorm = 0.5333, lr_0 = 5.8111e-04
Loss = 1.0698e-01, PNorm = 57.0162, GNorm = 1.6377, lr_0 = 5.8071e-04
Loss = 1.0696e-01, PNorm = 57.0342, GNorm = 1.8484, lr_0 = 5.8031e-04
Loss = 1.2871e-01, PNorm = 57.0506, GNorm = 1.2419, lr_0 = 5.7991e-04
Loss = 1.0119e-01, PNorm = 57.0611, GNorm = 0.8771, lr_0 = 5.7952e-04
Loss = 1.2847e-01, PNorm = 57.0759, GNorm = 1.4911, lr_0 = 5.7912e-04
Loss = 1.1162e-01, PNorm = 57.0928, GNorm = 0.7733, lr_0 = 5.7872e-04
Loss = 1.1357e-01, PNorm = 57.1079, GNorm = 0.7829, lr_0 = 5.7833e-04
Loss = 1.0769e-01, PNorm = 57.1219, GNorm = 0.8268, lr_0 = 5.7793e-04
Loss = 9.7310e-02, PNorm = 57.1348, GNorm = 1.1374, lr_0 = 5.7753e-04
Loss = 1.0565e-01, PNorm = 57.1463, GNorm = 0.8041, lr_0 = 5.7714e-04
Loss = 1.2095e-01, PNorm = 57.1618, GNorm = 0.4635, lr_0 = 5.7674e-04
Loss = 9.9712e-02, PNorm = 57.1763, GNorm = 1.1679, lr_0 = 5.7635e-04
Loss = 1.0984e-01, PNorm = 57.1922, GNorm = 1.1664, lr_0 = 5.7595e-04
Loss = 1.0015e-01, PNorm = 57.2085, GNorm = 0.8134, lr_0 = 5.7556e-04
Loss = 1.1165e-01, PNorm = 57.2166, GNorm = 0.8472, lr_0 = 5.7516e-04
Loss = 1.0371e-01, PNorm = 57.2261, GNorm = 0.6754, lr_0 = 5.7477e-04
Loss = 1.0823e-01, PNorm = 57.2365, GNorm = 0.7239, lr_0 = 5.7438e-04
Loss = 1.0141e-01, PNorm = 57.2505, GNorm = 1.3507, lr_0 = 5.7398e-04
Loss = 1.2019e-01, PNorm = 57.2653, GNorm = 1.0630, lr_0 = 5.7359e-04
Loss = 1.0799e-01, PNorm = 57.2784, GNorm = 0.8224, lr_0 = 5.7320e-04
Loss = 9.9815e-02, PNorm = 57.2938, GNorm = 0.7955, lr_0 = 5.7280e-04
Loss = 1.1622e-01, PNorm = 57.3071, GNorm = 0.6336, lr_0 = 5.7241e-04
Loss = 1.0157e-01, PNorm = 57.3173, GNorm = 0.5756, lr_0 = 5.7202e-04
Loss = 1.1415e-01, PNorm = 57.3361, GNorm = 0.4992, lr_0 = 5.7163e-04
Loss = 1.2030e-01, PNorm = 57.3551, GNorm = 0.7048, lr_0 = 5.7124e-04
Loss = 1.1626e-01, PNorm = 57.3662, GNorm = 1.1580, lr_0 = 5.7084e-04
Loss = 1.2042e-01, PNorm = 57.3794, GNorm = 0.6912, lr_0 = 5.7045e-04
Loss = 1.0464e-01, PNorm = 57.3923, GNorm = 0.7917, lr_0 = 5.7006e-04
Loss = 1.1410e-01, PNorm = 57.4032, GNorm = 1.3248, lr_0 = 5.6967e-04
Loss = 1.0100e-01, PNorm = 57.4152, GNorm = 0.8606, lr_0 = 5.6928e-04
Loss = 1.1567e-01, PNorm = 57.4300, GNorm = 0.7086, lr_0 = 5.6889e-04
Loss = 1.1370e-01, PNorm = 57.4489, GNorm = 0.6729, lr_0 = 5.6850e-04
Loss = 1.0995e-01, PNorm = 57.4661, GNorm = 0.8398, lr_0 = 5.6811e-04
Loss = 1.0255e-01, PNorm = 57.4807, GNorm = 1.7978, lr_0 = 5.6772e-04
Loss = 1.0987e-01, PNorm = 57.4919, GNorm = 0.5575, lr_0 = 5.6733e-04
Loss = 1.0561e-01, PNorm = 57.5054, GNorm = 0.9893, lr_0 = 5.6695e-04
Loss = 9.8295e-02, PNorm = 57.5186, GNorm = 0.9117, lr_0 = 5.6656e-04
Loss = 1.0191e-01, PNorm = 57.5307, GNorm = 0.6005, lr_0 = 5.6617e-04
Loss = 1.1651e-01, PNorm = 57.5447, GNorm = 0.6006, lr_0 = 5.6578e-04
Loss = 1.1757e-01, PNorm = 57.5598, GNorm = 1.0375, lr_0 = 5.6539e-04
Loss = 1.2630e-01, PNorm = 57.5758, GNorm = 1.1349, lr_0 = 5.6501e-04
Loss = 1.0058e-01, PNorm = 57.5878, GNorm = 1.2194, lr_0 = 5.6462e-04
Loss = 1.1388e-01, PNorm = 57.5963, GNorm = 1.1174, lr_0 = 5.6423e-04
Loss = 1.2972e-01, PNorm = 57.6132, GNorm = 0.8064, lr_0 = 5.6385e-04
Loss = 9.9101e-02, PNorm = 57.6300, GNorm = 0.8935, lr_0 = 5.6346e-04
Loss = 1.1050e-01, PNorm = 57.6438, GNorm = 0.6731, lr_0 = 5.6307e-04
Loss = 1.1046e-01, PNorm = 57.6573, GNorm = 1.0780, lr_0 = 5.6269e-04
Loss = 1.1292e-01, PNorm = 57.6704, GNorm = 0.8477, lr_0 = 5.6230e-04
Validation mae = 0.412784
Epoch 9
Loss = 1.1219e-01, PNorm = 57.6855, GNorm = 0.6376, lr_0 = 5.6192e-04
Loss = 1.0656e-01, PNorm = 57.6942, GNorm = 0.8975, lr_0 = 5.6153e-04
Loss = 8.3550e-02, PNorm = 57.7080, GNorm = 0.5565, lr_0 = 5.6115e-04
Loss = 9.4198e-02, PNorm = 57.7210, GNorm = 0.5272, lr_0 = 5.6076e-04
Loss = 9.9292e-02, PNorm = 57.7321, GNorm = 0.4603, lr_0 = 5.6038e-04
Loss = 8.9442e-02, PNorm = 57.7438, GNorm = 1.0742, lr_0 = 5.6000e-04
Loss = 8.0035e-02, PNorm = 57.7591, GNorm = 0.5512, lr_0 = 5.5961e-04
Loss = 9.6306e-02, PNorm = 57.7735, GNorm = 0.9948, lr_0 = 5.5923e-04
Loss = 1.0131e-01, PNorm = 57.7910, GNorm = 1.4337, lr_0 = 5.5885e-04
Loss = 1.0606e-01, PNorm = 57.8053, GNorm = 1.1750, lr_0 = 5.5846e-04
Loss = 1.1174e-01, PNorm = 57.8194, GNorm = 0.6605, lr_0 = 5.5808e-04
Loss = 9.3837e-02, PNorm = 57.8361, GNorm = 0.7293, lr_0 = 5.5770e-04
Loss = 1.1969e-01, PNorm = 57.8540, GNorm = 0.6717, lr_0 = 5.5732e-04
Loss = 9.3534e-02, PNorm = 57.8712, GNorm = 0.7042, lr_0 = 5.5693e-04
Loss = 1.0704e-01, PNorm = 57.8870, GNorm = 0.6577, lr_0 = 5.5655e-04
Loss = 1.1748e-01, PNorm = 57.9053, GNorm = 1.0228, lr_0 = 5.5617e-04
Loss = 1.1125e-01, PNorm = 57.9225, GNorm = 0.7426, lr_0 = 5.5579e-04
Loss = 1.0813e-01, PNorm = 57.9389, GNorm = 0.8321, lr_0 = 5.5541e-04
Loss = 9.3762e-02, PNorm = 57.9551, GNorm = 0.9134, lr_0 = 5.5503e-04
Loss = 1.0188e-01, PNorm = 57.9692, GNorm = 0.9141, lr_0 = 5.5465e-04
Loss = 1.0047e-01, PNorm = 57.9796, GNorm = 0.6491, lr_0 = 5.5427e-04
Loss = 9.2446e-02, PNorm = 57.9870, GNorm = 0.7994, lr_0 = 5.5389e-04
Loss = 1.0093e-01, PNorm = 57.9996, GNorm = 0.6420, lr_0 = 5.5351e-04
Loss = 9.0892e-02, PNorm = 58.0121, GNorm = 0.5792, lr_0 = 5.5313e-04
Loss = 8.5404e-02, PNorm = 58.0264, GNorm = 1.2402, lr_0 = 5.5275e-04
Loss = 9.9562e-02, PNorm = 58.0453, GNorm = 0.6581, lr_0 = 5.5237e-04
Loss = 9.6149e-02, PNorm = 58.0602, GNorm = 0.7901, lr_0 = 5.5199e-04
Loss = 9.8005e-02, PNorm = 58.0780, GNorm = 0.6394, lr_0 = 5.5162e-04
Loss = 8.8079e-02, PNorm = 58.0924, GNorm = 0.8324, lr_0 = 5.5124e-04
Loss = 1.1495e-01, PNorm = 58.1082, GNorm = 0.5618, lr_0 = 5.5086e-04
Loss = 1.0020e-01, PNorm = 58.1242, GNorm = 0.5821, lr_0 = 5.5048e-04
Loss = 1.0383e-01, PNorm = 58.1369, GNorm = 0.6550, lr_0 = 5.5011e-04
Loss = 1.0412e-01, PNorm = 58.1470, GNorm = 0.7591, lr_0 = 5.4973e-04
Loss = 9.1837e-02, PNorm = 58.1561, GNorm = 0.5292, lr_0 = 5.4935e-04
Loss = 1.1310e-01, PNorm = 58.1648, GNorm = 0.7241, lr_0 = 5.4898e-04
Loss = 1.0455e-01, PNorm = 58.1768, GNorm = 1.2023, lr_0 = 5.4860e-04
Loss = 8.7181e-02, PNorm = 58.1865, GNorm = 1.6211, lr_0 = 5.4822e-04
Loss = 9.9650e-02, PNorm = 58.1987, GNorm = 1.2126, lr_0 = 5.4785e-04
Loss = 1.1181e-01, PNorm = 58.2154, GNorm = 0.5888, lr_0 = 5.4747e-04
Loss = 1.1223e-01, PNorm = 58.2320, GNorm = 0.8264, lr_0 = 5.4710e-04
Loss = 1.1234e-01, PNorm = 58.2466, GNorm = 0.5900, lr_0 = 5.4672e-04
Loss = 1.0180e-01, PNorm = 58.2596, GNorm = 0.6472, lr_0 = 5.4635e-04
Loss = 1.0430e-01, PNorm = 58.2739, GNorm = 0.6638, lr_0 = 5.4597e-04
Loss = 1.0085e-01, PNorm = 58.2878, GNorm = 1.0230, lr_0 = 5.4560e-04
Loss = 9.8266e-02, PNorm = 58.3026, GNorm = 0.9195, lr_0 = 5.4523e-04
Loss = 1.0411e-01, PNorm = 58.3189, GNorm = 0.6401, lr_0 = 5.4485e-04
Loss = 9.2620e-02, PNorm = 58.3272, GNorm = 1.0872, lr_0 = 5.4448e-04
Loss = 1.0769e-01, PNorm = 58.3374, GNorm = 1.6342, lr_0 = 5.4411e-04
Loss = 1.1880e-01, PNorm = 58.3548, GNorm = 2.1473, lr_0 = 5.4373e-04
Loss = 1.1309e-01, PNorm = 58.3729, GNorm = 0.8348, lr_0 = 5.4336e-04
Loss = 1.0848e-01, PNorm = 58.3875, GNorm = 0.6740, lr_0 = 5.4299e-04
Loss = 9.6594e-02, PNorm = 58.4020, GNorm = 0.7973, lr_0 = 5.4262e-04
Loss = 1.1059e-01, PNorm = 58.4142, GNorm = 0.5692, lr_0 = 5.4225e-04
Loss = 1.0139e-01, PNorm = 58.4301, GNorm = 1.0397, lr_0 = 5.4187e-04
Loss = 1.0496e-01, PNorm = 58.4434, GNorm = 0.7104, lr_0 = 5.4150e-04
Loss = 1.1248e-01, PNorm = 58.4578, GNorm = 1.2709, lr_0 = 5.4113e-04
Loss = 1.1360e-01, PNorm = 58.4766, GNorm = 0.5311, lr_0 = 5.4076e-04
Loss = 1.0838e-01, PNorm = 58.4943, GNorm = 1.0375, lr_0 = 5.4039e-04
Loss = 9.4816e-02, PNorm = 58.5033, GNorm = 0.5823, lr_0 = 5.4002e-04
Loss = 9.1756e-02, PNorm = 58.5144, GNorm = 0.5905, lr_0 = 5.3965e-04
Loss = 1.0756e-01, PNorm = 58.5307, GNorm = 1.2775, lr_0 = 5.3928e-04
Loss = 9.2812e-02, PNorm = 58.5447, GNorm = 0.9968, lr_0 = 5.3891e-04
Loss = 8.6559e-02, PNorm = 58.5531, GNorm = 1.1060, lr_0 = 5.3854e-04
Loss = 1.0593e-01, PNorm = 58.5639, GNorm = 0.4668, lr_0 = 5.3817e-04
Loss = 9.1406e-02, PNorm = 58.5790, GNorm = 0.9947, lr_0 = 5.3781e-04
Loss = 9.9326e-02, PNorm = 58.5855, GNorm = 0.6622, lr_0 = 5.3744e-04
Loss = 1.0612e-01, PNorm = 58.5959, GNorm = 1.6514, lr_0 = 5.3707e-04
Loss = 1.0071e-01, PNorm = 58.6072, GNorm = 0.9043, lr_0 = 5.3670e-04
Loss = 1.0969e-01, PNorm = 58.6181, GNorm = 0.6328, lr_0 = 5.3633e-04
Loss = 9.4125e-02, PNorm = 58.6356, GNorm = 0.6566, lr_0 = 5.3597e-04
Loss = 9.1687e-02, PNorm = 58.6526, GNorm = 1.0722, lr_0 = 5.3560e-04
Loss = 1.0561e-01, PNorm = 58.6683, GNorm = 0.5465, lr_0 = 5.3523e-04
Loss = 1.1218e-01, PNorm = 58.6791, GNorm = 1.0275, lr_0 = 5.3486e-04
Loss = 9.4758e-02, PNorm = 58.6883, GNorm = 0.6506, lr_0 = 5.3450e-04
Loss = 1.1154e-01, PNorm = 58.7001, GNorm = 0.8301, lr_0 = 5.3413e-04
Loss = 1.1668e-01, PNorm = 58.7157, GNorm = 0.7176, lr_0 = 5.3377e-04
Loss = 1.0815e-01, PNorm = 58.7343, GNorm = 0.4950, lr_0 = 5.3340e-04
Loss = 1.0742e-01, PNorm = 58.7521, GNorm = 0.6724, lr_0 = 5.3304e-04
Loss = 1.2731e-01, PNorm = 58.7720, GNorm = 1.5559, lr_0 = 5.3267e-04
Loss = 1.0808e-01, PNorm = 58.7928, GNorm = 1.0262, lr_0 = 5.3231e-04
Loss = 1.1304e-01, PNorm = 58.8094, GNorm = 0.8893, lr_0 = 5.3194e-04
Loss = 9.8133e-02, PNorm = 58.8221, GNorm = 0.9903, lr_0 = 5.3158e-04
Loss = 1.0190e-01, PNorm = 58.8361, GNorm = 0.6748, lr_0 = 5.3121e-04
Loss = 9.3995e-02, PNorm = 58.8509, GNorm = 1.2369, lr_0 = 5.3085e-04
Loss = 9.8099e-02, PNorm = 58.8656, GNorm = 0.6259, lr_0 = 5.3048e-04
Loss = 1.0577e-01, PNorm = 58.8782, GNorm = 0.5174, lr_0 = 5.3012e-04
Loss = 1.2145e-01, PNorm = 58.8901, GNorm = 1.1200, lr_0 = 5.2976e-04
Loss = 9.5653e-02, PNorm = 58.9015, GNorm = 0.6524, lr_0 = 5.2939e-04
Loss = 9.3416e-02, PNorm = 58.9121, GNorm = 1.7351, lr_0 = 5.2903e-04
Loss = 1.1567e-01, PNorm = 58.9239, GNorm = 0.5996, lr_0 = 5.2867e-04
Loss = 1.0095e-01, PNorm = 58.9360, GNorm = 1.1556, lr_0 = 5.2831e-04
Loss = 9.7911e-02, PNorm = 58.9467, GNorm = 0.5222, lr_0 = 5.2795e-04
Loss = 1.1145e-01, PNorm = 58.9597, GNorm = 1.5765, lr_0 = 5.2758e-04
Loss = 1.0162e-01, PNorm = 58.9725, GNorm = 0.9611, lr_0 = 5.2722e-04
Loss = 1.0038e-01, PNorm = 58.9842, GNorm = 0.5640, lr_0 = 5.2686e-04
Loss = 1.1324e-01, PNorm = 58.9936, GNorm = 0.6564, lr_0 = 5.2650e-04
Loss = 1.0368e-01, PNorm = 59.0019, GNorm = 0.8050, lr_0 = 5.2614e-04
Loss = 1.0182e-01, PNorm = 59.0076, GNorm = 0.7941, lr_0 = 5.2578e-04
Loss = 8.6040e-02, PNorm = 59.0162, GNorm = 0.7574, lr_0 = 5.2542e-04
Loss = 1.0031e-01, PNorm = 59.0248, GNorm = 0.5945, lr_0 = 5.2506e-04
Loss = 1.2501e-01, PNorm = 59.0347, GNorm = 0.6773, lr_0 = 5.2470e-04
Loss = 1.0138e-01, PNorm = 59.0453, GNorm = 1.1047, lr_0 = 5.2434e-04
Loss = 1.0547e-01, PNorm = 59.0547, GNorm = 0.8217, lr_0 = 5.2398e-04
Loss = 1.0203e-01, PNorm = 59.0625, GNorm = 0.9065, lr_0 = 5.2362e-04
Loss = 9.2021e-02, PNorm = 59.0719, GNorm = 0.7041, lr_0 = 5.2326e-04
Loss = 8.8909e-02, PNorm = 59.0796, GNorm = 0.5902, lr_0 = 5.2290e-04
Loss = 1.0454e-01, PNorm = 59.0887, GNorm = 0.8976, lr_0 = 5.2255e-04
Loss = 1.0760e-01, PNorm = 59.0996, GNorm = 0.6234, lr_0 = 5.2219e-04
Loss = 1.1736e-01, PNorm = 59.1170, GNorm = 1.8075, lr_0 = 5.2183e-04
Loss = 1.1355e-01, PNorm = 59.1353, GNorm = 0.8310, lr_0 = 5.2147e-04
Loss = 1.0709e-01, PNorm = 59.1467, GNorm = 1.0796, lr_0 = 5.2112e-04
Loss = 9.5084e-02, PNorm = 59.1578, GNorm = 0.6404, lr_0 = 5.2076e-04
Loss = 9.0381e-02, PNorm = 59.1630, GNorm = 0.9912, lr_0 = 5.2040e-04
Loss = 1.0773e-01, PNorm = 59.1769, GNorm = 0.5863, lr_0 = 5.2005e-04
Loss = 1.0212e-01, PNorm = 59.1943, GNorm = 0.5233, lr_0 = 5.1969e-04
Loss = 1.0978e-01, PNorm = 59.2111, GNorm = 1.5337, lr_0 = 5.1933e-04
Loss = 1.1141e-01, PNorm = 59.2259, GNorm = 1.3354, lr_0 = 5.1898e-04
Loss = 1.1351e-01, PNorm = 59.2410, GNorm = 0.8620, lr_0 = 5.1862e-04
Loss = 1.0048e-01, PNorm = 59.2531, GNorm = 0.8552, lr_0 = 5.1827e-04
Loss = 9.7434e-02, PNorm = 59.2628, GNorm = 0.5926, lr_0 = 5.1791e-04
Validation mae = 0.413016
Epoch 10
Loss = 8.2581e-02, PNorm = 59.2744, GNorm = 0.6303, lr_0 = 5.1756e-04
Loss = 8.9173e-02, PNorm = 59.2870, GNorm = 0.9378, lr_0 = 5.1720e-04
Loss = 7.9627e-02, PNorm = 59.2963, GNorm = 0.7164, lr_0 = 5.1685e-04
Loss = 9.1759e-02, PNorm = 59.3038, GNorm = 0.6284, lr_0 = 5.1649e-04
Loss = 9.0564e-02, PNorm = 59.3126, GNorm = 0.9509, lr_0 = 5.1614e-04
Loss = 9.2897e-02, PNorm = 59.3187, GNorm = 1.3587, lr_0 = 5.1579e-04
Loss = 9.5810e-02, PNorm = 59.3295, GNorm = 1.0719, lr_0 = 5.1543e-04
Loss = 9.7943e-02, PNorm = 59.3444, GNorm = 0.5311, lr_0 = 5.1508e-04
Loss = 1.1212e-01, PNorm = 59.3625, GNorm = 0.8319, lr_0 = 5.1473e-04
Loss = 1.1264e-01, PNorm = 59.3826, GNorm = 1.5952, lr_0 = 5.1437e-04
Loss = 1.0457e-01, PNorm = 59.4004, GNorm = 1.4434, lr_0 = 5.1402e-04
Loss = 9.3136e-02, PNorm = 59.4163, GNorm = 0.4849, lr_0 = 5.1367e-04
Loss = 9.7531e-02, PNorm = 59.4300, GNorm = 0.9031, lr_0 = 5.1332e-04
Loss = 8.5953e-02, PNorm = 59.4417, GNorm = 0.8392, lr_0 = 5.1297e-04
Loss = 1.0562e-01, PNorm = 59.4513, GNorm = 0.8012, lr_0 = 5.1262e-04
Loss = 1.0401e-01, PNorm = 59.4625, GNorm = 0.8892, lr_0 = 5.1226e-04
Loss = 9.7805e-02, PNorm = 59.4832, GNorm = 1.1891, lr_0 = 5.1191e-04
Loss = 9.1301e-02, PNorm = 59.5052, GNorm = 1.0704, lr_0 = 5.1156e-04
Loss = 8.1878e-02, PNorm = 59.5199, GNorm = 1.4635, lr_0 = 5.1121e-04
Loss = 1.1328e-01, PNorm = 59.5332, GNorm = 0.7546, lr_0 = 5.1086e-04
Loss = 9.5820e-02, PNorm = 59.5485, GNorm = 0.7054, lr_0 = 5.1051e-04
Loss = 9.5945e-02, PNorm = 59.5623, GNorm = 0.4847, lr_0 = 5.1016e-04
Loss = 1.0065e-01, PNorm = 59.5741, GNorm = 0.6611, lr_0 = 5.0981e-04
Loss = 8.7489e-02, PNorm = 59.5798, GNorm = 0.6234, lr_0 = 5.0946e-04
Loss = 9.9926e-02, PNorm = 59.5922, GNorm = 0.8294, lr_0 = 5.0911e-04
Loss = 7.7555e-02, PNorm = 59.6062, GNorm = 0.4537, lr_0 = 5.0877e-04
Loss = 8.9301e-02, PNorm = 59.6150, GNorm = 0.4294, lr_0 = 5.0842e-04
Loss = 8.6695e-02, PNorm = 59.6226, GNorm = 0.7395, lr_0 = 5.0807e-04
Loss = 8.6007e-02, PNorm = 59.6342, GNorm = 0.7102, lr_0 = 5.0772e-04
Loss = 9.3664e-02, PNorm = 59.6506, GNorm = 0.7530, lr_0 = 5.0737e-04
Loss = 1.0148e-01, PNorm = 59.6623, GNorm = 0.7602, lr_0 = 5.0703e-04
Loss = 9.3838e-02, PNorm = 59.6705, GNorm = 0.7270, lr_0 = 5.0668e-04
Loss = 8.6351e-02, PNorm = 59.6822, GNorm = 0.4799, lr_0 = 5.0633e-04
Loss = 8.8678e-02, PNorm = 59.6929, GNorm = 0.6552, lr_0 = 5.0598e-04
Loss = 9.3483e-02, PNorm = 59.7001, GNorm = 0.5654, lr_0 = 5.0564e-04
Loss = 9.6025e-02, PNorm = 59.7101, GNorm = 0.6511, lr_0 = 5.0529e-04
Loss = 9.6403e-02, PNorm = 59.7240, GNorm = 0.6898, lr_0 = 5.0494e-04
Loss = 9.1743e-02, PNorm = 59.7391, GNorm = 0.4627, lr_0 = 5.0460e-04
Loss = 9.3474e-02, PNorm = 59.7545, GNorm = 0.6957, lr_0 = 5.0425e-04
Loss = 9.6328e-02, PNorm = 59.7674, GNorm = 0.7734, lr_0 = 5.0391e-04
Loss = 1.0098e-01, PNorm = 59.7833, GNorm = 0.8415, lr_0 = 5.0356e-04
Loss = 8.5558e-02, PNorm = 59.7963, GNorm = 0.4319, lr_0 = 5.0322e-04
Loss = 8.7904e-02, PNorm = 59.8046, GNorm = 0.5797, lr_0 = 5.0287e-04
Loss = 8.4276e-02, PNorm = 59.8140, GNorm = 1.0716, lr_0 = 5.0253e-04
Loss = 9.8342e-02, PNorm = 59.8231, GNorm = 0.8755, lr_0 = 5.0218e-04
Loss = 9.8346e-02, PNorm = 59.8394, GNorm = 1.4828, lr_0 = 5.0184e-04
Loss = 1.0320e-01, PNorm = 59.8532, GNorm = 0.6450, lr_0 = 5.0150e-04
Loss = 8.3605e-02, PNorm = 59.8629, GNorm = 0.6087, lr_0 = 5.0115e-04
Loss = 9.8810e-02, PNorm = 59.8734, GNorm = 0.6940, lr_0 = 5.0081e-04
Loss = 8.8380e-02, PNorm = 59.8874, GNorm = 0.5944, lr_0 = 5.0047e-04
Loss = 9.7387e-02, PNorm = 59.8972, GNorm = 0.5815, lr_0 = 5.0012e-04
Loss = 8.9280e-02, PNorm = 59.9094, GNorm = 0.5874, lr_0 = 4.9978e-04
Loss = 8.7842e-02, PNorm = 59.9186, GNorm = 0.7631, lr_0 = 4.9944e-04
Loss = 9.8765e-02, PNorm = 59.9334, GNorm = 0.6447, lr_0 = 4.9910e-04
Loss = 9.8417e-02, PNorm = 59.9480, GNorm = 0.4593, lr_0 = 4.9875e-04
Loss = 8.6478e-02, PNorm = 59.9575, GNorm = 0.4460, lr_0 = 4.9841e-04
Loss = 1.0480e-01, PNorm = 59.9709, GNorm = 0.7383, lr_0 = 4.9807e-04
Loss = 9.4254e-02, PNorm = 59.9885, GNorm = 0.8916, lr_0 = 4.9773e-04
Loss = 1.0368e-01, PNorm = 59.9991, GNorm = 1.3183, lr_0 = 4.9739e-04
Loss = 9.4972e-02, PNorm = 60.0107, GNorm = 0.9040, lr_0 = 4.9705e-04
Loss = 8.7616e-02, PNorm = 60.0227, GNorm = 0.5948, lr_0 = 4.9671e-04
Loss = 1.0051e-01, PNorm = 60.0320, GNorm = 0.5142, lr_0 = 4.9637e-04
Loss = 1.0634e-01, PNorm = 60.0401, GNorm = 0.5069, lr_0 = 4.9603e-04
Loss = 9.0586e-02, PNorm = 60.0467, GNorm = 0.4789, lr_0 = 4.9569e-04
Loss = 1.0317e-01, PNorm = 60.0552, GNorm = 0.6978, lr_0 = 4.9535e-04
Loss = 7.9872e-02, PNorm = 60.0661, GNorm = 0.5934, lr_0 = 4.9501e-04
Loss = 1.0416e-01, PNorm = 60.0782, GNorm = 0.6413, lr_0 = 4.9467e-04
Loss = 1.0989e-01, PNorm = 60.0898, GNorm = 1.1165, lr_0 = 4.9433e-04
Loss = 8.9149e-02, PNorm = 60.0986, GNorm = 0.6699, lr_0 = 4.9399e-04
Loss = 9.6349e-02, PNorm = 60.1107, GNorm = 0.7757, lr_0 = 4.9365e-04
Loss = 9.1216e-02, PNorm = 60.1235, GNorm = 0.7816, lr_0 = 4.9332e-04
Loss = 9.1756e-02, PNorm = 60.1332, GNorm = 0.5521, lr_0 = 4.9298e-04
Loss = 1.0248e-01, PNorm = 60.1425, GNorm = 0.9783, lr_0 = 4.9264e-04
Loss = 1.0478e-01, PNorm = 60.1566, GNorm = 0.5053, lr_0 = 4.9230e-04
Loss = 8.8729e-02, PNorm = 60.1706, GNorm = 0.6168, lr_0 = 4.9197e-04
Loss = 1.0154e-01, PNorm = 60.1812, GNorm = 0.9667, lr_0 = 4.9163e-04
Loss = 8.6887e-02, PNorm = 60.1932, GNorm = 1.1142, lr_0 = 4.9129e-04
Loss = 9.6246e-02, PNorm = 60.2055, GNorm = 0.5396, lr_0 = 4.9095e-04
Loss = 9.4920e-02, PNorm = 60.2150, GNorm = 0.5429, lr_0 = 4.9062e-04
Loss = 9.4965e-02, PNorm = 60.2207, GNorm = 0.4400, lr_0 = 4.9028e-04
Loss = 8.1669e-02, PNorm = 60.2307, GNorm = 0.5069, lr_0 = 4.8995e-04
Loss = 8.3577e-02, PNorm = 60.2398, GNorm = 0.5784, lr_0 = 4.8961e-04
Loss = 1.0144e-01, PNorm = 60.2473, GNorm = 0.5500, lr_0 = 4.8928e-04
Loss = 9.3878e-02, PNorm = 60.2594, GNorm = 0.7892, lr_0 = 4.8894e-04
Loss = 1.0142e-01, PNorm = 60.2721, GNorm = 0.9569, lr_0 = 4.8861e-04
Loss = 9.8236e-02, PNorm = 60.2853, GNorm = 0.8803, lr_0 = 4.8827e-04
Loss = 1.0661e-01, PNorm = 60.2985, GNorm = 0.6260, lr_0 = 4.8794e-04
Loss = 9.4689e-02, PNorm = 60.3084, GNorm = 0.6531, lr_0 = 4.8760e-04
Loss = 8.8999e-02, PNorm = 60.3178, GNorm = 0.8783, lr_0 = 4.8727e-04
Loss = 9.8188e-02, PNorm = 60.3280, GNorm = 0.7776, lr_0 = 4.8693e-04
Loss = 8.7035e-02, PNorm = 60.3425, GNorm = 1.2698, lr_0 = 4.8660e-04
Loss = 9.4216e-02, PNorm = 60.3559, GNorm = 0.6939, lr_0 = 4.8627e-04
Loss = 8.5490e-02, PNorm = 60.3710, GNorm = 0.6732, lr_0 = 4.8593e-04
Loss = 9.4632e-02, PNorm = 60.3794, GNorm = 0.7166, lr_0 = 4.8560e-04
Loss = 1.0085e-01, PNorm = 60.3864, GNorm = 0.9810, lr_0 = 4.8527e-04
Loss = 1.0040e-01, PNorm = 60.3921, GNorm = 0.6804, lr_0 = 4.8494e-04
Loss = 9.6584e-02, PNorm = 60.3998, GNorm = 0.5391, lr_0 = 4.8460e-04
Loss = 9.0671e-02, PNorm = 60.4082, GNorm = 0.5408, lr_0 = 4.8427e-04
Loss = 9.6608e-02, PNorm = 60.4184, GNorm = 1.6953, lr_0 = 4.8394e-04
Loss = 1.0198e-01, PNorm = 60.4324, GNorm = 0.7772, lr_0 = 4.8361e-04
Loss = 1.0731e-01, PNorm = 60.4468, GNorm = 0.4625, lr_0 = 4.8328e-04
Loss = 9.6009e-02, PNorm = 60.4607, GNorm = 0.4970, lr_0 = 4.8295e-04
Loss = 8.6225e-02, PNorm = 60.4719, GNorm = 0.5000, lr_0 = 4.8262e-04
Loss = 8.6652e-02, PNorm = 60.4842, GNorm = 0.5153, lr_0 = 4.8228e-04
Loss = 8.3140e-02, PNorm = 60.4912, GNorm = 0.6215, lr_0 = 4.8195e-04
Loss = 8.9625e-02, PNorm = 60.4990, GNorm = 0.8628, lr_0 = 4.8162e-04
Loss = 1.0119e-01, PNorm = 60.5090, GNorm = 0.8506, lr_0 = 4.8129e-04
Loss = 9.8335e-02, PNorm = 60.5179, GNorm = 0.6683, lr_0 = 4.8096e-04
Loss = 9.3760e-02, PNorm = 60.5260, GNorm = 0.6929, lr_0 = 4.8064e-04
Loss = 9.3036e-02, PNorm = 60.5363, GNorm = 0.8445, lr_0 = 4.8031e-04
Loss = 8.5826e-02, PNorm = 60.5484, GNorm = 0.7891, lr_0 = 4.7998e-04
Loss = 9.8874e-02, PNorm = 60.5575, GNorm = 0.8539, lr_0 = 4.7965e-04
Loss = 9.1917e-02, PNorm = 60.5670, GNorm = 0.5993, lr_0 = 4.7932e-04
Loss = 1.0183e-01, PNorm = 60.5824, GNorm = 0.6660, lr_0 = 4.7899e-04
Loss = 9.1598e-02, PNorm = 60.5966, GNorm = 0.9006, lr_0 = 4.7866e-04
Loss = 8.0319e-02, PNorm = 60.6080, GNorm = 0.7613, lr_0 = 4.7833e-04
Loss = 9.6926e-02, PNorm = 60.6162, GNorm = 1.2905, lr_0 = 4.7801e-04
Loss = 8.6182e-02, PNorm = 60.6258, GNorm = 0.7725, lr_0 = 4.7768e-04
Loss = 8.7076e-02, PNorm = 60.6318, GNorm = 0.9092, lr_0 = 4.7735e-04
Loss = 8.7696e-02, PNorm = 60.6407, GNorm = 0.5882, lr_0 = 4.7703e-04
Validation mae = 0.399446
Epoch 11
Loss = 8.6773e-02, PNorm = 60.6533, GNorm = 0.8146, lr_0 = 4.7670e-04
Loss = 9.8247e-02, PNorm = 60.6679, GNorm = 0.8819, lr_0 = 4.7637e-04
Loss = 9.2229e-02, PNorm = 60.6821, GNorm = 0.5330, lr_0 = 4.7605e-04
Loss = 7.3376e-02, PNorm = 60.6981, GNorm = 0.5554, lr_0 = 4.7572e-04
Loss = 8.0840e-02, PNorm = 60.7152, GNorm = 0.8495, lr_0 = 4.7539e-04
Loss = 8.6043e-02, PNorm = 60.7322, GNorm = 0.7028, lr_0 = 4.7507e-04
Loss = 1.0110e-01, PNorm = 60.7435, GNorm = 1.1833, lr_0 = 4.7474e-04
Loss = 8.3924e-02, PNorm = 60.7533, GNorm = 0.7901, lr_0 = 4.7442e-04
Loss = 7.9510e-02, PNorm = 60.7631, GNorm = 0.4702, lr_0 = 4.7409e-04
Loss = 8.7892e-02, PNorm = 60.7732, GNorm = 0.9883, lr_0 = 4.7377e-04
Loss = 9.9414e-02, PNorm = 60.7862, GNorm = 0.7800, lr_0 = 4.7344e-04
Loss = 8.7308e-02, PNorm = 60.7956, GNorm = 0.8692, lr_0 = 4.7312e-04
Loss = 8.9959e-02, PNorm = 60.8084, GNorm = 0.6741, lr_0 = 4.7279e-04
Loss = 9.3803e-02, PNorm = 60.8228, GNorm = 0.7781, lr_0 = 4.7247e-04
Loss = 7.9719e-02, PNorm = 60.8348, GNorm = 0.6819, lr_0 = 4.7215e-04
Loss = 7.7500e-02, PNorm = 60.8422, GNorm = 0.5409, lr_0 = 4.7182e-04
Loss = 9.9264e-02, PNorm = 60.8527, GNorm = 1.0490, lr_0 = 4.7150e-04
Loss = 7.8572e-02, PNorm = 60.8640, GNorm = 1.5607, lr_0 = 4.7118e-04
Loss = 1.0031e-01, PNorm = 60.8806, GNorm = 0.6674, lr_0 = 4.7085e-04
Loss = 8.1872e-02, PNorm = 60.8919, GNorm = 0.6056, lr_0 = 4.7053e-04
Loss = 8.1694e-02, PNorm = 60.9019, GNorm = 0.8997, lr_0 = 4.7021e-04
Loss = 9.3819e-02, PNorm = 60.9130, GNorm = 0.6383, lr_0 = 4.6989e-04
Loss = 8.0220e-02, PNorm = 60.9248, GNorm = 0.6600, lr_0 = 4.6957e-04
Loss = 8.6865e-02, PNorm = 60.9307, GNorm = 1.0068, lr_0 = 4.6924e-04
Loss = 8.6503e-02, PNorm = 60.9423, GNorm = 1.1242, lr_0 = 4.6892e-04
Loss = 9.9868e-02, PNorm = 60.9617, GNorm = 0.6962, lr_0 = 4.6860e-04
Loss = 9.5722e-02, PNorm = 60.9783, GNorm = 0.5041, lr_0 = 4.6828e-04
Loss = 8.2973e-02, PNorm = 60.9874, GNorm = 0.7027, lr_0 = 4.6796e-04
Loss = 8.1162e-02, PNorm = 60.9908, GNorm = 0.5671, lr_0 = 4.6764e-04
Loss = 8.9248e-02, PNorm = 61.0045, GNorm = 0.6466, lr_0 = 4.6732e-04
Loss = 9.0113e-02, PNorm = 61.0199, GNorm = 0.5132, lr_0 = 4.6700e-04
Loss = 7.2881e-02, PNorm = 61.0332, GNorm = 0.5625, lr_0 = 4.6668e-04
Loss = 9.3797e-02, PNorm = 61.0412, GNorm = 0.7737, lr_0 = 4.6636e-04
Loss = 8.1150e-02, PNorm = 61.0524, GNorm = 0.8236, lr_0 = 4.6604e-04
Loss = 9.4475e-02, PNorm = 61.0655, GNorm = 0.6854, lr_0 = 4.6572e-04
Loss = 7.7119e-02, PNorm = 61.0772, GNorm = 0.4987, lr_0 = 4.6540e-04
Loss = 8.1101e-02, PNorm = 61.0867, GNorm = 0.5491, lr_0 = 4.6508e-04
Loss = 9.8032e-02, PNorm = 61.0999, GNorm = 0.7163, lr_0 = 4.6476e-04
Loss = 9.2610e-02, PNorm = 61.1104, GNorm = 0.8747, lr_0 = 4.6445e-04
Loss = 1.0270e-01, PNorm = 61.1198, GNorm = 0.7091, lr_0 = 4.6413e-04
Loss = 9.4771e-02, PNorm = 61.1330, GNorm = 0.7542, lr_0 = 4.6381e-04
Loss = 7.6302e-02, PNorm = 61.1468, GNorm = 0.8948, lr_0 = 4.6349e-04
Loss = 8.5422e-02, PNorm = 61.1601, GNorm = 0.6664, lr_0 = 4.6317e-04
Loss = 9.1622e-02, PNorm = 61.1709, GNorm = 0.9844, lr_0 = 4.6286e-04
Loss = 8.6473e-02, PNorm = 61.1781, GNorm = 0.8289, lr_0 = 4.6254e-04
Loss = 1.0344e-01, PNorm = 61.1900, GNorm = 0.7010, lr_0 = 4.6222e-04
Loss = 9.3999e-02, PNorm = 61.2049, GNorm = 0.8094, lr_0 = 4.6191e-04
Loss = 9.7400e-02, PNorm = 61.2175, GNorm = 0.5744, lr_0 = 4.6159e-04
Loss = 8.6320e-02, PNorm = 61.2279, GNorm = 0.7817, lr_0 = 4.6127e-04
Loss = 9.4662e-02, PNorm = 61.2400, GNorm = 0.8657, lr_0 = 4.6096e-04
Loss = 8.4889e-02, PNorm = 61.2506, GNorm = 1.0936, lr_0 = 4.6064e-04
Loss = 8.3343e-02, PNorm = 61.2641, GNorm = 0.5344, lr_0 = 4.6033e-04
Loss = 8.9335e-02, PNorm = 61.2803, GNorm = 0.5078, lr_0 = 4.6001e-04
Loss = 8.2241e-02, PNorm = 61.2923, GNorm = 0.8036, lr_0 = 4.5970e-04
Loss = 9.1935e-02, PNorm = 61.2990, GNorm = 0.6287, lr_0 = 4.5938e-04
Loss = 7.7187e-02, PNorm = 61.3054, GNorm = 0.4684, lr_0 = 4.5907e-04
Loss = 8.6337e-02, PNorm = 61.3163, GNorm = 0.5092, lr_0 = 4.5875e-04
Loss = 8.3272e-02, PNorm = 61.3265, GNorm = 1.0442, lr_0 = 4.5844e-04
Loss = 8.6923e-02, PNorm = 61.3361, GNorm = 0.6139, lr_0 = 4.5812e-04
Loss = 9.2027e-02, PNorm = 61.3468, GNorm = 0.8351, lr_0 = 4.5781e-04
Loss = 9.0198e-02, PNorm = 61.3593, GNorm = 0.8412, lr_0 = 4.5750e-04
Loss = 9.2507e-02, PNorm = 61.3717, GNorm = 0.5328, lr_0 = 4.5718e-04
Loss = 9.0915e-02, PNorm = 61.3851, GNorm = 1.0142, lr_0 = 4.5687e-04
Loss = 7.8630e-02, PNorm = 61.3923, GNorm = 0.9508, lr_0 = 4.5656e-04
Loss = 9.3783e-02, PNorm = 61.3992, GNorm = 0.4440, lr_0 = 4.5624e-04
Loss = 7.5071e-02, PNorm = 61.4116, GNorm = 0.7626, lr_0 = 4.5593e-04
Loss = 7.8756e-02, PNorm = 61.4253, GNorm = 0.7740, lr_0 = 4.5562e-04
Loss = 8.8721e-02, PNorm = 61.4336, GNorm = 0.9948, lr_0 = 4.5531e-04
Loss = 8.9927e-02, PNorm = 61.4414, GNorm = 0.7132, lr_0 = 4.5499e-04
Loss = 7.9261e-02, PNorm = 61.4499, GNorm = 0.6549, lr_0 = 4.5468e-04
Loss = 8.0712e-02, PNorm = 61.4565, GNorm = 0.5979, lr_0 = 4.5437e-04
Loss = 8.4717e-02, PNorm = 61.4659, GNorm = 0.6632, lr_0 = 4.5406e-04
Loss = 8.1874e-02, PNorm = 61.4724, GNorm = 1.1634, lr_0 = 4.5375e-04
Loss = 1.0116e-01, PNorm = 61.4829, GNorm = 0.4967, lr_0 = 4.5344e-04
Loss = 8.1849e-02, PNorm = 61.4942, GNorm = 1.1084, lr_0 = 4.5313e-04
Loss = 7.9119e-02, PNorm = 61.5017, GNorm = 0.7788, lr_0 = 4.5282e-04
Loss = 8.7542e-02, PNorm = 61.5099, GNorm = 0.8682, lr_0 = 4.5251e-04
Loss = 9.7753e-02, PNorm = 61.5215, GNorm = 1.1106, lr_0 = 4.5220e-04
Loss = 8.6047e-02, PNorm = 61.5332, GNorm = 0.7195, lr_0 = 4.5189e-04
Loss = 8.8215e-02, PNorm = 61.5450, GNorm = 0.5372, lr_0 = 4.5158e-04
Loss = 9.3324e-02, PNorm = 61.5528, GNorm = 0.8137, lr_0 = 4.5127e-04
Loss = 9.4465e-02, PNorm = 61.5627, GNorm = 0.9814, lr_0 = 4.5096e-04
Loss = 9.9452e-02, PNorm = 61.5766, GNorm = 1.2845, lr_0 = 4.5065e-04
Loss = 9.7027e-02, PNorm = 61.5894, GNorm = 0.6340, lr_0 = 4.5034e-04
Loss = 9.7316e-02, PNorm = 61.5967, GNorm = 0.6287, lr_0 = 4.5003e-04
Loss = 8.4160e-02, PNorm = 61.6077, GNorm = 0.6669, lr_0 = 4.4972e-04
Loss = 9.0536e-02, PNorm = 61.6197, GNorm = 0.7500, lr_0 = 4.4942e-04
Loss = 7.8705e-02, PNorm = 61.6315, GNorm = 0.6445, lr_0 = 4.4911e-04
Loss = 9.3713e-02, PNorm = 61.6388, GNorm = 0.6616, lr_0 = 4.4880e-04
Loss = 8.3801e-02, PNorm = 61.6495, GNorm = 1.0185, lr_0 = 4.4849e-04
Loss = 9.2183e-02, PNorm = 61.6629, GNorm = 1.0760, lr_0 = 4.4819e-04
Loss = 9.2634e-02, PNorm = 61.6747, GNorm = 0.5032, lr_0 = 4.4788e-04
Loss = 8.2499e-02, PNorm = 61.6881, GNorm = 0.5259, lr_0 = 4.4757e-04
Loss = 7.7963e-02, PNorm = 61.6983, GNorm = 1.2020, lr_0 = 4.4727e-04
Loss = 8.9506e-02, PNorm = 61.7086, GNorm = 0.6983, lr_0 = 4.4696e-04
Loss = 9.4205e-02, PNorm = 61.7199, GNorm = 1.0256, lr_0 = 4.4665e-04
Loss = 8.2601e-02, PNorm = 61.7249, GNorm = 1.2722, lr_0 = 4.4635e-04
Loss = 8.9121e-02, PNorm = 61.7304, GNorm = 0.8150, lr_0 = 4.4604e-04
Loss = 9.0643e-02, PNorm = 61.7397, GNorm = 0.6510, lr_0 = 4.4574e-04
Loss = 8.3076e-02, PNorm = 61.7477, GNorm = 0.4502, lr_0 = 4.4543e-04
Loss = 9.7138e-02, PNorm = 61.7550, GNorm = 0.7946, lr_0 = 4.4513e-04
Loss = 7.3963e-02, PNorm = 61.7672, GNorm = 0.8917, lr_0 = 4.4482e-04
Loss = 9.3503e-02, PNorm = 61.7771, GNorm = 0.6755, lr_0 = 4.4452e-04
Loss = 9.7386e-02, PNorm = 61.7883, GNorm = 0.8977, lr_0 = 4.4421e-04
Loss = 7.9132e-02, PNorm = 61.7994, GNorm = 0.7805, lr_0 = 4.4391e-04
Loss = 8.2878e-02, PNorm = 61.8041, GNorm = 0.8146, lr_0 = 4.4360e-04
Loss = 1.0347e-01, PNorm = 61.8127, GNorm = 0.5912, lr_0 = 4.4330e-04
Loss = 8.8790e-02, PNorm = 61.8232, GNorm = 0.6752, lr_0 = 4.4299e-04
Loss = 8.3642e-02, PNorm = 61.8318, GNorm = 0.6914, lr_0 = 4.4269e-04
Loss = 7.7791e-02, PNorm = 61.8376, GNorm = 0.7999, lr_0 = 4.4239e-04
Loss = 7.9974e-02, PNorm = 61.8437, GNorm = 0.8178, lr_0 = 4.4209e-04
Loss = 7.9684e-02, PNorm = 61.8497, GNorm = 0.7393, lr_0 = 4.4178e-04
Loss = 1.0265e-01, PNorm = 61.8590, GNorm = 0.6628, lr_0 = 4.4148e-04
Loss = 8.5355e-02, PNorm = 61.8676, GNorm = 0.6234, lr_0 = 4.4118e-04
Loss = 1.0866e-01, PNorm = 61.8795, GNorm = 0.4489, lr_0 = 4.4088e-04
Loss = 8.9431e-02, PNorm = 61.8878, GNorm = 0.5640, lr_0 = 4.4057e-04
Loss = 9.6394e-02, PNorm = 61.8949, GNorm = 0.7686, lr_0 = 4.4027e-04
Loss = 8.8911e-02, PNorm = 61.9028, GNorm = 0.9038, lr_0 = 4.3997e-04
Loss = 9.0443e-02, PNorm = 61.9132, GNorm = 0.5687, lr_0 = 4.3967e-04
Loss = 8.9151e-02, PNorm = 61.9243, GNorm = 0.4387, lr_0 = 4.3937e-04
Validation mae = 0.396524
Epoch 12
Loss = 7.6227e-02, PNorm = 61.9375, GNorm = 0.6867, lr_0 = 4.3907e-04
Loss = 6.7025e-02, PNorm = 61.9507, GNorm = 0.6251, lr_0 = 4.3877e-04
Loss = 7.8645e-02, PNorm = 61.9614, GNorm = 0.5432, lr_0 = 4.3846e-04
Loss = 8.1745e-02, PNorm = 61.9729, GNorm = 0.4174, lr_0 = 4.3816e-04
Loss = 8.2867e-02, PNorm = 61.9857, GNorm = 0.8497, lr_0 = 4.3786e-04
Loss = 8.1353e-02, PNorm = 61.9966, GNorm = 0.6618, lr_0 = 4.3756e-04
Loss = 7.2591e-02, PNorm = 62.0044, GNorm = 0.6869, lr_0 = 4.3726e-04
Loss = 8.1384e-02, PNorm = 62.0135, GNorm = 0.6734, lr_0 = 4.3696e-04
Loss = 7.3099e-02, PNorm = 62.0232, GNorm = 0.8772, lr_0 = 4.3667e-04
Loss = 8.1629e-02, PNorm = 62.0373, GNorm = 0.4425, lr_0 = 4.3637e-04
Loss = 7.0254e-02, PNorm = 62.0497, GNorm = 0.8564, lr_0 = 4.3607e-04
Loss = 8.6693e-02, PNorm = 62.0590, GNorm = 0.5185, lr_0 = 4.3577e-04
Loss = 6.2584e-02, PNorm = 62.0690, GNorm = 0.4654, lr_0 = 4.3547e-04
Loss = 8.5781e-02, PNorm = 62.0792, GNorm = 0.6699, lr_0 = 4.3517e-04
Loss = 7.5983e-02, PNorm = 62.0915, GNorm = 0.4514, lr_0 = 4.3487e-04
Loss = 8.1998e-02, PNorm = 62.1064, GNorm = 0.6260, lr_0 = 4.3458e-04
Loss = 7.5939e-02, PNorm = 62.1169, GNorm = 0.6003, lr_0 = 4.3428e-04
Loss = 8.9388e-02, PNorm = 62.1247, GNorm = 0.6048, lr_0 = 4.3398e-04
Loss = 7.5508e-02, PNorm = 62.1307, GNorm = 0.5871, lr_0 = 4.3368e-04
Loss = 8.3598e-02, PNorm = 62.1393, GNorm = 0.6438, lr_0 = 4.3339e-04
Loss = 8.9119e-02, PNorm = 62.1519, GNorm = 0.8185, lr_0 = 4.3309e-04
Loss = 8.7817e-02, PNorm = 62.1656, GNorm = 0.6292, lr_0 = 4.3279e-04
Loss = 8.2774e-02, PNorm = 62.1760, GNorm = 0.7235, lr_0 = 4.3250e-04
Loss = 8.1305e-02, PNorm = 62.1885, GNorm = 0.9257, lr_0 = 4.3220e-04
Loss = 7.4396e-02, PNorm = 62.1982, GNorm = 0.6555, lr_0 = 4.3190e-04
Loss = 7.9350e-02, PNorm = 62.2072, GNorm = 0.5442, lr_0 = 4.3161e-04
Loss = 7.5250e-02, PNorm = 62.2165, GNorm = 0.7335, lr_0 = 4.3131e-04
Loss = 7.0883e-02, PNorm = 62.2224, GNorm = 0.5162, lr_0 = 4.3102e-04
Loss = 9.5456e-02, PNorm = 62.2353, GNorm = 0.8646, lr_0 = 4.3072e-04
Loss = 7.9640e-02, PNorm = 62.2480, GNorm = 0.6834, lr_0 = 4.3043e-04
Loss = 8.9490e-02, PNorm = 62.2636, GNorm = 0.8542, lr_0 = 4.3013e-04
Loss = 8.3487e-02, PNorm = 62.2784, GNorm = 1.1491, lr_0 = 4.2984e-04
Loss = 8.1265e-02, PNorm = 62.2915, GNorm = 0.6576, lr_0 = 4.2954e-04
Loss = 8.1711e-02, PNorm = 62.3007, GNorm = 0.5841, lr_0 = 4.2925e-04
Loss = 7.8861e-02, PNorm = 62.3112, GNorm = 0.6819, lr_0 = 4.2895e-04
Loss = 7.6577e-02, PNorm = 62.3215, GNorm = 0.7370, lr_0 = 4.2866e-04
Loss = 7.5216e-02, PNorm = 62.3301, GNorm = 0.5025, lr_0 = 4.2837e-04
Loss = 8.7202e-02, PNorm = 62.3395, GNorm = 0.6703, lr_0 = 4.2807e-04
Loss = 7.3097e-02, PNorm = 62.3496, GNorm = 0.7592, lr_0 = 4.2778e-04
Loss = 7.4187e-02, PNorm = 62.3556, GNorm = 0.6429, lr_0 = 4.2749e-04
Loss = 8.2922e-02, PNorm = 62.3647, GNorm = 0.4918, lr_0 = 4.2719e-04
Loss = 9.0553e-02, PNorm = 62.3695, GNorm = 0.5411, lr_0 = 4.2690e-04
Loss = 7.6630e-02, PNorm = 62.3781, GNorm = 0.8209, lr_0 = 4.2661e-04
Loss = 7.3935e-02, PNorm = 62.3846, GNorm = 0.4558, lr_0 = 4.2632e-04
Loss = 9.3073e-02, PNorm = 62.3934, GNorm = 0.7895, lr_0 = 4.2602e-04
Loss = 9.2393e-02, PNorm = 62.4062, GNorm = 0.7333, lr_0 = 4.2573e-04
Loss = 8.1668e-02, PNorm = 62.4176, GNorm = 0.8594, lr_0 = 4.2544e-04
Loss = 7.9179e-02, PNorm = 62.4223, GNorm = 0.5431, lr_0 = 4.2515e-04
Loss = 8.9670e-02, PNorm = 62.4307, GNorm = 0.5938, lr_0 = 4.2486e-04
Loss = 9.2538e-02, PNorm = 62.4446, GNorm = 0.6284, lr_0 = 4.2457e-04
Loss = 8.1741e-02, PNorm = 62.4533, GNorm = 1.2359, lr_0 = 4.2428e-04
Loss = 9.6774e-02, PNorm = 62.4602, GNorm = 0.5154, lr_0 = 4.2399e-04
Loss = 1.0166e-01, PNorm = 62.4704, GNorm = 0.6927, lr_0 = 4.2370e-04
Loss = 8.7538e-02, PNorm = 62.4792, GNorm = 0.8461, lr_0 = 4.2340e-04
Loss = 7.5704e-02, PNorm = 62.4902, GNorm = 0.7895, lr_0 = 4.2311e-04
Loss = 8.6670e-02, PNorm = 62.5004, GNorm = 0.4147, lr_0 = 4.2283e-04
Loss = 9.2465e-02, PNorm = 62.5108, GNorm = 0.7383, lr_0 = 4.2254e-04
Loss = 8.2509e-02, PNorm = 62.5202, GNorm = 0.7280, lr_0 = 4.2225e-04
Loss = 8.3695e-02, PNorm = 62.5273, GNorm = 0.6918, lr_0 = 4.2196e-04
Loss = 7.6254e-02, PNorm = 62.5387, GNorm = 0.8615, lr_0 = 4.2167e-04
Loss = 8.7932e-02, PNorm = 62.5484, GNorm = 0.8657, lr_0 = 4.2138e-04
Loss = 8.5281e-02, PNorm = 62.5581, GNorm = 0.3903, lr_0 = 4.2109e-04
Loss = 7.8105e-02, PNorm = 62.5681, GNorm = 0.5700, lr_0 = 4.2080e-04
Loss = 8.4793e-02, PNorm = 62.5773, GNorm = 0.5671, lr_0 = 4.2051e-04
Loss = 8.0896e-02, PNorm = 62.5851, GNorm = 0.7084, lr_0 = 4.2023e-04
Loss = 8.3122e-02, PNorm = 62.5916, GNorm = 1.0944, lr_0 = 4.1994e-04
Loss = 7.5606e-02, PNorm = 62.6010, GNorm = 0.5529, lr_0 = 4.1965e-04
Loss = 7.0970e-02, PNorm = 62.6105, GNorm = 0.5015, lr_0 = 4.1936e-04
Loss = 8.2677e-02, PNorm = 62.6206, GNorm = 0.9339, lr_0 = 4.1907e-04
Loss = 8.9112e-02, PNorm = 62.6320, GNorm = 0.5969, lr_0 = 4.1879e-04
Loss = 8.7656e-02, PNorm = 62.6409, GNorm = 0.5091, lr_0 = 4.1850e-04
Loss = 8.0683e-02, PNorm = 62.6465, GNorm = 0.5667, lr_0 = 4.1821e-04
Loss = 9.4439e-02, PNorm = 62.6508, GNorm = 0.6840, lr_0 = 4.1793e-04
Loss = 9.1071e-02, PNorm = 62.6636, GNorm = 0.7703, lr_0 = 4.1764e-04
Loss = 8.0916e-02, PNorm = 62.6749, GNorm = 0.5346, lr_0 = 4.1736e-04
Loss = 7.5554e-02, PNorm = 62.6876, GNorm = 0.8472, lr_0 = 4.1707e-04
Loss = 7.9040e-02, PNorm = 62.6984, GNorm = 0.4118, lr_0 = 4.1678e-04
Loss = 8.2074e-02, PNorm = 62.7108, GNorm = 0.4469, lr_0 = 4.1650e-04
Loss = 8.9824e-02, PNorm = 62.7226, GNorm = 0.7373, lr_0 = 4.1621e-04
Loss = 8.5909e-02, PNorm = 62.7287, GNorm = 0.7722, lr_0 = 4.1593e-04
Loss = 8.9825e-02, PNorm = 62.7340, GNorm = 1.3255, lr_0 = 4.1564e-04
Loss = 8.5345e-02, PNorm = 62.7411, GNorm = 1.1725, lr_0 = 4.1536e-04
Loss = 8.8327e-02, PNorm = 62.7496, GNorm = 0.5484, lr_0 = 4.1507e-04
Loss = 7.7947e-02, PNorm = 62.7572, GNorm = 0.5794, lr_0 = 4.1479e-04
Loss = 7.2700e-02, PNorm = 62.7681, GNorm = 0.9611, lr_0 = 4.1450e-04
Loss = 7.8918e-02, PNorm = 62.7771, GNorm = 0.8338, lr_0 = 4.1422e-04
Loss = 8.2719e-02, PNorm = 62.7845, GNorm = 0.5030, lr_0 = 4.1394e-04
Loss = 8.3054e-02, PNorm = 62.7905, GNorm = 0.6585, lr_0 = 4.1365e-04
Loss = 6.8423e-02, PNorm = 62.7998, GNorm = 0.4759, lr_0 = 4.1337e-04
Loss = 9.4617e-02, PNorm = 62.8115, GNorm = 1.0081, lr_0 = 4.1309e-04
Loss = 9.2214e-02, PNorm = 62.8226, GNorm = 1.8216, lr_0 = 4.1280e-04
Loss = 6.9381e-02, PNorm = 62.8338, GNorm = 0.6700, lr_0 = 4.1252e-04
Loss = 7.2008e-02, PNorm = 62.8442, GNorm = 0.5182, lr_0 = 4.1224e-04
Loss = 8.0798e-02, PNorm = 62.8554, GNorm = 0.6708, lr_0 = 4.1196e-04
Loss = 7.4291e-02, PNorm = 62.8642, GNorm = 0.8347, lr_0 = 4.1167e-04
Loss = 7.5816e-02, PNorm = 62.8724, GNorm = 0.6736, lr_0 = 4.1139e-04
Loss = 8.2457e-02, PNorm = 62.8800, GNorm = 0.5343, lr_0 = 4.1111e-04
Loss = 7.9819e-02, PNorm = 62.8903, GNorm = 0.7275, lr_0 = 4.1083e-04
Loss = 8.3374e-02, PNorm = 62.9007, GNorm = 0.6136, lr_0 = 4.1055e-04
Loss = 8.5030e-02, PNorm = 62.9159, GNorm = 0.6986, lr_0 = 4.1027e-04
Loss = 8.3672e-02, PNorm = 62.9267, GNorm = 0.4348, lr_0 = 4.0998e-04
Loss = 8.9335e-02, PNorm = 62.9363, GNorm = 0.6232, lr_0 = 4.0970e-04
Loss = 7.6798e-02, PNorm = 62.9411, GNorm = 0.4787, lr_0 = 4.0942e-04
Loss = 7.5542e-02, PNorm = 62.9493, GNorm = 0.6062, lr_0 = 4.0914e-04
Loss = 7.4938e-02, PNorm = 62.9551, GNorm = 0.6153, lr_0 = 4.0886e-04
Loss = 7.9204e-02, PNorm = 62.9614, GNorm = 0.7349, lr_0 = 4.0858e-04
Loss = 9.5881e-02, PNorm = 62.9683, GNorm = 0.8361, lr_0 = 4.0830e-04
Loss = 8.0323e-02, PNorm = 62.9763, GNorm = 0.5240, lr_0 = 4.0802e-04
Loss = 9.0167e-02, PNorm = 62.9835, GNorm = 0.5623, lr_0 = 4.0774e-04
Loss = 9.0579e-02, PNorm = 62.9906, GNorm = 0.5849, lr_0 = 4.0746e-04
Loss = 8.4612e-02, PNorm = 63.0041, GNorm = 0.5559, lr_0 = 4.0718e-04
Loss = 8.5777e-02, PNorm = 63.0148, GNorm = 0.5994, lr_0 = 4.0691e-04
Loss = 7.3500e-02, PNorm = 63.0256, GNorm = 0.5239, lr_0 = 4.0663e-04
Loss = 7.9203e-02, PNorm = 63.0307, GNorm = 0.5806, lr_0 = 4.0635e-04
Loss = 8.1732e-02, PNorm = 63.0406, GNorm = 0.9791, lr_0 = 4.0607e-04
Loss = 7.9203e-02, PNorm = 63.0497, GNorm = 0.5754, lr_0 = 4.0579e-04
Loss = 9.4485e-02, PNorm = 63.0582, GNorm = 1.1029, lr_0 = 4.0551e-04
Loss = 7.7710e-02, PNorm = 63.0621, GNorm = 0.4480, lr_0 = 4.0524e-04
Loss = 8.6846e-02, PNorm = 63.0654, GNorm = 0.7163, lr_0 = 4.0496e-04
Loss = 8.4627e-02, PNorm = 63.0695, GNorm = 0.9439, lr_0 = 4.0468e-04
Validation mae = 0.391913
Epoch 13
Loss = 6.8953e-02, PNorm = 63.0780, GNorm = 0.6055, lr_0 = 4.0440e-04
Loss = 6.3143e-02, PNorm = 63.0881, GNorm = 0.7213, lr_0 = 4.0413e-04
Loss = 6.9375e-02, PNorm = 63.0968, GNorm = 0.4657, lr_0 = 4.0385e-04
Loss = 7.7856e-02, PNorm = 63.1057, GNorm = 0.7244, lr_0 = 4.0357e-04
Loss = 6.5959e-02, PNorm = 63.1130, GNorm = 0.4717, lr_0 = 4.0330e-04
Loss = 6.9499e-02, PNorm = 63.1200, GNorm = 0.5148, lr_0 = 4.0302e-04
Loss = 7.4266e-02, PNorm = 63.1294, GNorm = 0.6267, lr_0 = 4.0274e-04
Loss = 7.8918e-02, PNorm = 63.1405, GNorm = 1.0807, lr_0 = 4.0247e-04
Loss = 7.7382e-02, PNorm = 63.1503, GNorm = 0.9489, lr_0 = 4.0219e-04
Loss = 7.3926e-02, PNorm = 63.1592, GNorm = 0.5178, lr_0 = 4.0192e-04
Loss = 6.1715e-02, PNorm = 63.1655, GNorm = 0.5842, lr_0 = 4.0164e-04
Loss = 7.4973e-02, PNorm = 63.1734, GNorm = 0.9800, lr_0 = 4.0137e-04
Loss = 8.1524e-02, PNorm = 63.1829, GNorm = 0.6062, lr_0 = 4.0109e-04
Loss = 7.2600e-02, PNorm = 63.1923, GNorm = 0.6928, lr_0 = 4.0082e-04
Loss = 6.8440e-02, PNorm = 63.2021, GNorm = 0.7393, lr_0 = 4.0054e-04
Loss = 6.8914e-02, PNorm = 63.2133, GNorm = 0.4924, lr_0 = 4.0027e-04
Loss = 6.6669e-02, PNorm = 63.2229, GNorm = 0.5025, lr_0 = 3.9999e-04
Loss = 7.1882e-02, PNorm = 63.2296, GNorm = 0.6215, lr_0 = 3.9972e-04
Loss = 7.4991e-02, PNorm = 63.2342, GNorm = 0.5162, lr_0 = 3.9945e-04
Loss = 6.4316e-02, PNorm = 63.2442, GNorm = 0.4376, lr_0 = 3.9917e-04
Loss = 7.0187e-02, PNorm = 63.2556, GNorm = 0.7173, lr_0 = 3.9890e-04
Loss = 7.3298e-02, PNorm = 63.2623, GNorm = 0.3576, lr_0 = 3.9863e-04
Loss = 8.4468e-02, PNorm = 63.2710, GNorm = 0.7581, lr_0 = 3.9835e-04
Loss = 7.9898e-02, PNorm = 63.2800, GNorm = 0.5557, lr_0 = 3.9808e-04
Loss = 6.7208e-02, PNorm = 63.2890, GNorm = 0.6434, lr_0 = 3.9781e-04
Loss = 6.2896e-02, PNorm = 63.2978, GNorm = 0.4792, lr_0 = 3.9753e-04
Loss = 8.7960e-02, PNorm = 63.3069, GNorm = 0.6131, lr_0 = 3.9726e-04
Loss = 8.8172e-02, PNorm = 63.3134, GNorm = 0.4417, lr_0 = 3.9699e-04
Loss = 7.6341e-02, PNorm = 63.3213, GNorm = 0.4840, lr_0 = 3.9672e-04
Loss = 6.1451e-02, PNorm = 63.3271, GNorm = 0.4248, lr_0 = 3.9645e-04
Loss = 7.8647e-02, PNorm = 63.3358, GNorm = 0.6944, lr_0 = 3.9617e-04
Loss = 8.1035e-02, PNorm = 63.3467, GNorm = 0.5352, lr_0 = 3.9590e-04
Loss = 6.9248e-02, PNorm = 63.3560, GNorm = 0.6598, lr_0 = 3.9563e-04
Loss = 7.8552e-02, PNorm = 63.3693, GNorm = 0.7030, lr_0 = 3.9536e-04
Loss = 7.8392e-02, PNorm = 63.3825, GNorm = 0.9710, lr_0 = 3.9509e-04
Loss = 7.5731e-02, PNorm = 63.3888, GNorm = 0.5903, lr_0 = 3.9482e-04
Loss = 7.3273e-02, PNorm = 63.3968, GNorm = 1.0784, lr_0 = 3.9455e-04
Loss = 7.3348e-02, PNorm = 63.4076, GNorm = 0.5165, lr_0 = 3.9428e-04
Loss = 7.0889e-02, PNorm = 63.4171, GNorm = 0.6410, lr_0 = 3.9401e-04
Loss = 7.1065e-02, PNorm = 63.4279, GNorm = 0.6828, lr_0 = 3.9374e-04
Loss = 7.6535e-02, PNorm = 63.4375, GNorm = 0.5592, lr_0 = 3.9347e-04
Loss = 7.8892e-02, PNorm = 63.4476, GNorm = 0.5388, lr_0 = 3.9320e-04
Loss = 7.2544e-02, PNorm = 63.4584, GNorm = 0.5162, lr_0 = 3.9293e-04
Loss = 8.3232e-02, PNorm = 63.4654, GNorm = 0.7921, lr_0 = 3.9266e-04
Loss = 6.7722e-02, PNorm = 63.4731, GNorm = 0.7949, lr_0 = 3.9239e-04
Loss = 7.0993e-02, PNorm = 63.4837, GNorm = 0.6809, lr_0 = 3.9212e-04
Loss = 8.6564e-02, PNorm = 63.4903, GNorm = 0.7225, lr_0 = 3.9185e-04
Loss = 7.9934e-02, PNorm = 63.4963, GNorm = 0.5948, lr_0 = 3.9159e-04
Loss = 6.7054e-02, PNorm = 63.5020, GNorm = 0.5488, lr_0 = 3.9132e-04
Loss = 8.0418e-02, PNorm = 63.5102, GNorm = 0.7450, lr_0 = 3.9105e-04
Loss = 7.8059e-02, PNorm = 63.5190, GNorm = 1.2089, lr_0 = 3.9078e-04
Loss = 7.0860e-02, PNorm = 63.5269, GNorm = 0.5255, lr_0 = 3.9051e-04
Loss = 8.9048e-02, PNorm = 63.5361, GNorm = 0.6212, lr_0 = 3.9025e-04
Loss = 7.9359e-02, PNorm = 63.5470, GNorm = 0.5928, lr_0 = 3.8998e-04
Loss = 7.0507e-02, PNorm = 63.5559, GNorm = 0.7345, lr_0 = 3.8971e-04
Loss = 8.7658e-02, PNorm = 63.5647, GNorm = 0.7472, lr_0 = 3.8945e-04
Loss = 6.8886e-02, PNorm = 63.5733, GNorm = 0.8063, lr_0 = 3.8918e-04
Loss = 7.5650e-02, PNorm = 63.5833, GNorm = 0.6032, lr_0 = 3.8891e-04
Loss = 6.3084e-02, PNorm = 63.5949, GNorm = 0.4301, lr_0 = 3.8865e-04
Loss = 7.5970e-02, PNorm = 63.6001, GNorm = 0.5248, lr_0 = 3.8838e-04
Loss = 7.0922e-02, PNorm = 63.6077, GNorm = 1.0254, lr_0 = 3.8811e-04
Loss = 8.2672e-02, PNorm = 63.6167, GNorm = 0.5860, lr_0 = 3.8785e-04
Loss = 7.3611e-02, PNorm = 63.6256, GNorm = 0.9811, lr_0 = 3.8758e-04
Loss = 7.2705e-02, PNorm = 63.6334, GNorm = 0.9411, lr_0 = 3.8732e-04
Loss = 8.6531e-02, PNorm = 63.6408, GNorm = 0.8217, lr_0 = 3.8705e-04
Loss = 7.8238e-02, PNorm = 63.6529, GNorm = 0.9818, lr_0 = 3.8679e-04
Loss = 7.1839e-02, PNorm = 63.6592, GNorm = 0.5748, lr_0 = 3.8652e-04
Loss = 7.2631e-02, PNorm = 63.6615, GNorm = 0.5460, lr_0 = 3.8626e-04
Loss = 9.3405e-02, PNorm = 63.6704, GNorm = 0.8857, lr_0 = 3.8599e-04
Loss = 7.6731e-02, PNorm = 63.6814, GNorm = 0.8611, lr_0 = 3.8573e-04
Loss = 8.5752e-02, PNorm = 63.6896, GNorm = 0.6015, lr_0 = 3.8546e-04
Loss = 7.4195e-02, PNorm = 63.6980, GNorm = 0.6308, lr_0 = 3.8520e-04
Loss = 9.0398e-02, PNorm = 63.7064, GNorm = 1.7410, lr_0 = 3.8493e-04
Loss = 7.9046e-02, PNorm = 63.7155, GNorm = 0.4883, lr_0 = 3.8467e-04
Loss = 8.0325e-02, PNorm = 63.7272, GNorm = 0.5133, lr_0 = 3.8441e-04
Loss = 7.5183e-02, PNorm = 63.7355, GNorm = 1.0382, lr_0 = 3.8414e-04
Loss = 7.5586e-02, PNorm = 63.7426, GNorm = 0.7289, lr_0 = 3.8388e-04
Loss = 6.8967e-02, PNorm = 63.7496, GNorm = 0.5002, lr_0 = 3.8362e-04
Loss = 7.2899e-02, PNorm = 63.7574, GNorm = 0.9894, lr_0 = 3.8336e-04
Loss = 8.4105e-02, PNorm = 63.7646, GNorm = 0.5468, lr_0 = 3.8309e-04
Loss = 7.2434e-02, PNorm = 63.7738, GNorm = 0.4788, lr_0 = 3.8283e-04
Loss = 6.1990e-02, PNorm = 63.7823, GNorm = 0.5597, lr_0 = 3.8257e-04
Loss = 7.8790e-02, PNorm = 63.7876, GNorm = 0.7913, lr_0 = 3.8231e-04
Loss = 7.5340e-02, PNorm = 63.7935, GNorm = 0.8897, lr_0 = 3.8204e-04
Loss = 8.1837e-02, PNorm = 63.8006, GNorm = 0.9962, lr_0 = 3.8178e-04
Loss = 7.2316e-02, PNorm = 63.8085, GNorm = 0.9516, lr_0 = 3.8152e-04
Loss = 7.9989e-02, PNorm = 63.8150, GNorm = 1.1998, lr_0 = 3.8126e-04
Loss = 7.2690e-02, PNorm = 63.8251, GNorm = 1.0026, lr_0 = 3.8100e-04
Loss = 7.2229e-02, PNorm = 63.8353, GNorm = 0.4578, lr_0 = 3.8074e-04
Loss = 7.6208e-02, PNorm = 63.8417, GNorm = 0.6095, lr_0 = 3.8048e-04
Loss = 6.9768e-02, PNorm = 63.8508, GNorm = 0.5669, lr_0 = 3.8022e-04
Loss = 6.3870e-02, PNorm = 63.8591, GNorm = 0.3970, lr_0 = 3.7995e-04
Loss = 7.3385e-02, PNorm = 63.8626, GNorm = 0.4422, lr_0 = 3.7969e-04
Loss = 7.7103e-02, PNorm = 63.8710, GNorm = 0.6969, lr_0 = 3.7943e-04
Loss = 7.1536e-02, PNorm = 63.8806, GNorm = 0.5637, lr_0 = 3.7917e-04
Loss = 8.4799e-02, PNorm = 63.8889, GNorm = 0.4633, lr_0 = 3.7891e-04
Loss = 7.6435e-02, PNorm = 63.8936, GNorm = 0.6239, lr_0 = 3.7866e-04
Loss = 8.3609e-02, PNorm = 63.8971, GNorm = 0.7174, lr_0 = 3.7840e-04
Loss = 8.0535e-02, PNorm = 63.9056, GNorm = 0.8726, lr_0 = 3.7814e-04
Loss = 8.5601e-02, PNorm = 63.9158, GNorm = 0.7449, lr_0 = 3.7788e-04
Loss = 7.9012e-02, PNorm = 63.9243, GNorm = 0.7219, lr_0 = 3.7762e-04
Loss = 8.4230e-02, PNorm = 63.9356, GNorm = 0.6951, lr_0 = 3.7736e-04
Loss = 8.1658e-02, PNorm = 63.9466, GNorm = 0.9661, lr_0 = 3.7710e-04
Loss = 7.5982e-02, PNorm = 63.9547, GNorm = 0.7129, lr_0 = 3.7684e-04
Loss = 7.6534e-02, PNorm = 63.9597, GNorm = 0.5468, lr_0 = 3.7659e-04
Loss = 6.8688e-02, PNorm = 63.9644, GNorm = 0.4628, lr_0 = 3.7633e-04
Loss = 8.3407e-02, PNorm = 63.9675, GNorm = 0.8812, lr_0 = 3.7607e-04
Loss = 8.5014e-02, PNorm = 63.9735, GNorm = 0.4772, lr_0 = 3.7581e-04
Loss = 7.8163e-02, PNorm = 63.9815, GNorm = 1.2242, lr_0 = 3.7555e-04
Loss = 7.5949e-02, PNorm = 63.9875, GNorm = 0.6052, lr_0 = 3.7530e-04
Loss = 7.0463e-02, PNorm = 63.9930, GNorm = 0.5437, lr_0 = 3.7504e-04
Loss = 7.9221e-02, PNorm = 64.0006, GNorm = 0.7970, lr_0 = 3.7478e-04
Loss = 8.0953e-02, PNorm = 64.0107, GNorm = 0.7235, lr_0 = 3.7453e-04
Loss = 8.6241e-02, PNorm = 64.0156, GNorm = 0.5031, lr_0 = 3.7427e-04
Loss = 7.7048e-02, PNorm = 64.0223, GNorm = 0.5672, lr_0 = 3.7401e-04
Loss = 7.7370e-02, PNorm = 64.0294, GNorm = 0.7140, lr_0 = 3.7376e-04
Loss = 7.0436e-02, PNorm = 64.0378, GNorm = 0.7522, lr_0 = 3.7350e-04
Loss = 8.1295e-02, PNorm = 64.0473, GNorm = 0.6731, lr_0 = 3.7325e-04
Loss = 8.1948e-02, PNorm = 64.0576, GNorm = 0.6564, lr_0 = 3.7299e-04
Loss = 7.7737e-02, PNorm = 64.0659, GNorm = 0.5735, lr_0 = 3.7273e-04
Validation mae = 0.402532
Epoch 14
Loss = 9.1896e-02, PNorm = 64.0738, GNorm = 0.7508, lr_0 = 3.7248e-04
Loss = 6.7479e-02, PNorm = 64.0855, GNorm = 0.4143, lr_0 = 3.7222e-04
Loss = 6.2576e-02, PNorm = 64.0959, GNorm = 0.5898, lr_0 = 3.7197e-04
Loss = 6.3148e-02, PNorm = 64.1068, GNorm = 0.4282, lr_0 = 3.7171e-04
Loss = 6.1305e-02, PNorm = 64.1184, GNorm = 0.7214, lr_0 = 3.7146e-04
Loss = 7.8347e-02, PNorm = 64.1283, GNorm = 0.6265, lr_0 = 3.7120e-04
Loss = 7.2431e-02, PNorm = 64.1394, GNorm = 1.1672, lr_0 = 3.7095e-04
Loss = 6.0566e-02, PNorm = 64.1450, GNorm = 0.4200, lr_0 = 3.7070e-04
Loss = 7.2059e-02, PNorm = 64.1528, GNorm = 0.5237, lr_0 = 3.7044e-04
Loss = 6.7675e-02, PNorm = 64.1628, GNorm = 0.9975, lr_0 = 3.7019e-04
Loss = 6.0447e-02, PNorm = 64.1742, GNorm = 1.0705, lr_0 = 3.6993e-04
Loss = 6.7876e-02, PNorm = 64.1856, GNorm = 1.1158, lr_0 = 3.6968e-04
Loss = 6.3577e-02, PNorm = 64.1951, GNorm = 0.5952, lr_0 = 3.6943e-04
Loss = 7.9033e-02, PNorm = 64.2037, GNorm = 0.6279, lr_0 = 3.6917e-04
Loss = 5.8811e-02, PNorm = 64.2115, GNorm = 0.5425, lr_0 = 3.6892e-04
Loss = 7.7817e-02, PNorm = 64.2225, GNorm = 0.5589, lr_0 = 3.6867e-04
Loss = 6.7870e-02, PNorm = 64.2307, GNorm = 0.4232, lr_0 = 3.6842e-04
Loss = 7.1189e-02, PNorm = 64.2390, GNorm = 0.6068, lr_0 = 3.6816e-04
Loss = 6.9816e-02, PNorm = 64.2473, GNorm = 0.8985, lr_0 = 3.6791e-04
Loss = 8.0034e-02, PNorm = 64.2529, GNorm = 1.2413, lr_0 = 3.6766e-04
Loss = 6.8298e-02, PNorm = 64.2592, GNorm = 0.6968, lr_0 = 3.6741e-04
Loss = 7.0107e-02, PNorm = 64.2666, GNorm = 0.4178, lr_0 = 3.6716e-04
Loss = 6.2129e-02, PNorm = 64.2742, GNorm = 0.6652, lr_0 = 3.6690e-04
Loss = 6.3042e-02, PNorm = 64.2810, GNorm = 0.6815, lr_0 = 3.6665e-04
Loss = 6.9418e-02, PNorm = 64.2904, GNorm = 0.7218, lr_0 = 3.6640e-04
Loss = 5.9942e-02, PNorm = 64.2977, GNorm = 0.6824, lr_0 = 3.6615e-04
Loss = 8.0014e-02, PNorm = 64.3058, GNorm = 0.5219, lr_0 = 3.6590e-04
Loss = 7.2450e-02, PNorm = 64.3136, GNorm = 0.5478, lr_0 = 3.6565e-04
Loss = 6.5398e-02, PNorm = 64.3240, GNorm = 0.6288, lr_0 = 3.6540e-04
Loss = 6.4988e-02, PNorm = 64.3346, GNorm = 0.7191, lr_0 = 3.6515e-04
Loss = 7.1488e-02, PNorm = 64.3425, GNorm = 0.4574, lr_0 = 3.6490e-04
Loss = 6.4224e-02, PNorm = 64.3498, GNorm = 0.4910, lr_0 = 3.6465e-04
Loss = 6.5687e-02, PNorm = 64.3556, GNorm = 0.4528, lr_0 = 3.6440e-04
Loss = 7.6678e-02, PNorm = 64.3636, GNorm = 0.4934, lr_0 = 3.6415e-04
Loss = 7.3906e-02, PNorm = 64.3720, GNorm = 0.6016, lr_0 = 3.6390e-04
Loss = 6.5533e-02, PNorm = 64.3805, GNorm = 0.7568, lr_0 = 3.6365e-04
Loss = 7.6538e-02, PNorm = 64.3868, GNorm = 0.7031, lr_0 = 3.6340e-04
Loss = 6.8943e-02, PNorm = 64.3943, GNorm = 0.7006, lr_0 = 3.6315e-04
Loss = 6.4566e-02, PNorm = 64.4023, GNorm = 0.4628, lr_0 = 3.6290e-04
Loss = 7.1867e-02, PNorm = 64.4087, GNorm = 0.5947, lr_0 = 3.6266e-04
Loss = 7.2159e-02, PNorm = 64.4148, GNorm = 0.5771, lr_0 = 3.6241e-04
Loss = 7.0682e-02, PNorm = 64.4208, GNorm = 0.8580, lr_0 = 3.6216e-04
Loss = 7.6868e-02, PNorm = 64.4275, GNorm = 0.5933, lr_0 = 3.6191e-04
Loss = 6.7181e-02, PNorm = 64.4340, GNorm = 0.4362, lr_0 = 3.6166e-04
Loss = 7.0903e-02, PNorm = 64.4419, GNorm = 0.5100, lr_0 = 3.6141e-04
Loss = 8.1582e-02, PNorm = 64.4503, GNorm = 0.5104, lr_0 = 3.6117e-04
Loss = 6.0394e-02, PNorm = 64.4585, GNorm = 0.5611, lr_0 = 3.6092e-04
Loss = 6.8340e-02, PNorm = 64.4677, GNorm = 0.6375, lr_0 = 3.6067e-04
Loss = 8.4363e-02, PNorm = 64.4760, GNorm = 0.7914, lr_0 = 3.6043e-04
Loss = 7.6275e-02, PNorm = 64.4832, GNorm = 0.4618, lr_0 = 3.6018e-04
Loss = 7.4406e-02, PNorm = 64.4896, GNorm = 0.6222, lr_0 = 3.5993e-04
Loss = 7.6457e-02, PNorm = 64.4987, GNorm = 0.6971, lr_0 = 3.5969e-04
Loss = 7.4075e-02, PNorm = 64.5095, GNorm = 0.5035, lr_0 = 3.5944e-04
Loss = 7.4274e-02, PNorm = 64.5174, GNorm = 0.4986, lr_0 = 3.5919e-04
Loss = 6.9512e-02, PNorm = 64.5251, GNorm = 0.7030, lr_0 = 3.5895e-04
Loss = 7.1421e-02, PNorm = 64.5348, GNorm = 0.7319, lr_0 = 3.5870e-04
Loss = 8.0701e-02, PNorm = 64.5417, GNorm = 1.3472, lr_0 = 3.5845e-04
Loss = 7.2274e-02, PNorm = 64.5493, GNorm = 0.7764, lr_0 = 3.5821e-04
Loss = 6.7675e-02, PNorm = 64.5582, GNorm = 0.5918, lr_0 = 3.5796e-04
Loss = 7.4374e-02, PNorm = 64.5638, GNorm = 0.9442, lr_0 = 3.5772e-04
Loss = 5.7190e-02, PNorm = 64.5694, GNorm = 0.9218, lr_0 = 3.5747e-04
Loss = 6.8752e-02, PNorm = 64.5723, GNorm = 0.6843, lr_0 = 3.5723e-04
Loss = 7.4673e-02, PNorm = 64.5761, GNorm = 0.4944, lr_0 = 3.5698e-04
Loss = 6.0592e-02, PNorm = 64.5819, GNorm = 0.4716, lr_0 = 3.5674e-04
Loss = 7.6335e-02, PNorm = 64.5909, GNorm = 0.9071, lr_0 = 3.5650e-04
Loss = 7.1432e-02, PNorm = 64.5990, GNorm = 0.6354, lr_0 = 3.5625e-04
Loss = 7.7978e-02, PNorm = 64.6077, GNorm = 0.7579, lr_0 = 3.5601e-04
Loss = 6.3424e-02, PNorm = 64.6153, GNorm = 0.7377, lr_0 = 3.5576e-04
Loss = 6.9694e-02, PNorm = 64.6253, GNorm = 0.5391, lr_0 = 3.5552e-04
Loss = 6.5624e-02, PNorm = 64.6336, GNorm = 0.7891, lr_0 = 3.5528e-04
Loss = 7.3941e-02, PNorm = 64.6403, GNorm = 0.6989, lr_0 = 3.5503e-04
Loss = 6.8989e-02, PNorm = 64.6463, GNorm = 0.6623, lr_0 = 3.5479e-04
Loss = 6.9535e-02, PNorm = 64.6489, GNorm = 0.6336, lr_0 = 3.5455e-04
Loss = 6.4491e-02, PNorm = 64.6514, GNorm = 0.6472, lr_0 = 3.5430e-04
Loss = 7.3013e-02, PNorm = 64.6577, GNorm = 0.5153, lr_0 = 3.5406e-04
Loss = 6.9166e-02, PNorm = 64.6645, GNorm = 0.4875, lr_0 = 3.5382e-04
Loss = 7.1183e-02, PNorm = 64.6723, GNorm = 0.6594, lr_0 = 3.5358e-04
Loss = 6.6880e-02, PNorm = 64.6797, GNorm = 0.5889, lr_0 = 3.5333e-04
Loss = 7.9304e-02, PNorm = 64.6895, GNorm = 0.8074, lr_0 = 3.5309e-04
Loss = 7.8947e-02, PNorm = 64.6993, GNorm = 0.6226, lr_0 = 3.5285e-04
Loss = 7.7632e-02, PNorm = 64.7087, GNorm = 0.6833, lr_0 = 3.5261e-04
Loss = 7.0624e-02, PNorm = 64.7163, GNorm = 0.5777, lr_0 = 3.5237e-04
Loss = 7.4665e-02, PNorm = 64.7265, GNorm = 0.6220, lr_0 = 3.5212e-04
Loss = 6.6403e-02, PNorm = 64.7340, GNorm = 0.5588, lr_0 = 3.5188e-04
Loss = 6.5280e-02, PNorm = 64.7409, GNorm = 0.4403, lr_0 = 3.5164e-04
Loss = 6.6479e-02, PNorm = 64.7450, GNorm = 0.9058, lr_0 = 3.5140e-04
Loss = 8.2325e-02, PNorm = 64.7532, GNorm = 0.5651, lr_0 = 3.5116e-04
Loss = 7.7351e-02, PNorm = 64.7628, GNorm = 0.6746, lr_0 = 3.5092e-04
Loss = 7.0658e-02, PNorm = 64.7726, GNorm = 0.7144, lr_0 = 3.5068e-04
Loss = 8.6927e-02, PNorm = 64.7795, GNorm = 0.4304, lr_0 = 3.5044e-04
Loss = 6.6578e-02, PNorm = 64.7865, GNorm = 0.5803, lr_0 = 3.5020e-04
Loss = 7.2433e-02, PNorm = 64.7934, GNorm = 0.5275, lr_0 = 3.4996e-04
Loss = 8.0294e-02, PNorm = 64.8012, GNorm = 0.7080, lr_0 = 3.4972e-04
Loss = 6.6111e-02, PNorm = 64.8073, GNorm = 0.6092, lr_0 = 3.4948e-04
Loss = 6.7685e-02, PNorm = 64.8140, GNorm = 0.4131, lr_0 = 3.4924e-04
Loss = 7.0471e-02, PNorm = 64.8194, GNorm = 0.5937, lr_0 = 3.4900e-04
Loss = 6.1253e-02, PNorm = 64.8238, GNorm = 0.6880, lr_0 = 3.4876e-04
Loss = 7.5495e-02, PNorm = 64.8284, GNorm = 0.7021, lr_0 = 3.4852e-04
Loss = 7.6914e-02, PNorm = 64.8354, GNorm = 1.0815, lr_0 = 3.4828e-04
Loss = 6.6577e-02, PNorm = 64.8432, GNorm = 0.4815, lr_0 = 3.4805e-04
Loss = 6.1010e-02, PNorm = 64.8491, GNorm = 0.4438, lr_0 = 3.4781e-04
Loss = 6.5591e-02, PNorm = 64.8525, GNorm = 0.7200, lr_0 = 3.4757e-04
Loss = 6.9472e-02, PNorm = 64.8574, GNorm = 0.5422, lr_0 = 3.4733e-04
Loss = 7.2830e-02, PNorm = 64.8613, GNorm = 0.5180, lr_0 = 3.4709e-04
Loss = 7.0786e-02, PNorm = 64.8688, GNorm = 0.6313, lr_0 = 3.4686e-04
Loss = 6.9298e-02, PNorm = 64.8762, GNorm = 0.8615, lr_0 = 3.4662e-04
Loss = 6.8285e-02, PNorm = 64.8834, GNorm = 0.7385, lr_0 = 3.4638e-04
Loss = 6.9066e-02, PNorm = 64.8880, GNorm = 0.5258, lr_0 = 3.4614e-04
Loss = 5.8686e-02, PNorm = 64.8937, GNorm = 0.4104, lr_0 = 3.4591e-04
Loss = 6.8403e-02, PNorm = 64.8999, GNorm = 0.7572, lr_0 = 3.4567e-04
Loss = 7.5759e-02, PNorm = 64.9069, GNorm = 0.5768, lr_0 = 3.4543e-04
Loss = 7.8661e-02, PNorm = 64.9142, GNorm = 0.6424, lr_0 = 3.4520e-04
Loss = 7.4002e-02, PNorm = 64.9207, GNorm = 0.8397, lr_0 = 3.4496e-04
Loss = 6.9339e-02, PNorm = 64.9280, GNorm = 0.8114, lr_0 = 3.4472e-04
Loss = 7.9464e-02, PNorm = 64.9330, GNorm = 0.9298, lr_0 = 3.4449e-04
Loss = 7.3989e-02, PNorm = 64.9374, GNorm = 0.5150, lr_0 = 3.4425e-04
Loss = 7.0321e-02, PNorm = 64.9426, GNorm = 0.4330, lr_0 = 3.4402e-04
Loss = 6.7513e-02, PNorm = 64.9503, GNorm = 0.7783, lr_0 = 3.4378e-04
Loss = 7.8622e-02, PNorm = 64.9576, GNorm = 0.5016, lr_0 = 3.4354e-04
Loss = 8.0103e-02, PNorm = 64.9682, GNorm = 0.5449, lr_0 = 3.4331e-04
Validation mae = 0.392043
Epoch 15
Loss = 6.8809e-02, PNorm = 64.9754, GNorm = 0.7981, lr_0 = 3.4307e-04
Loss = 6.6879e-02, PNorm = 64.9826, GNorm = 0.7639, lr_0 = 3.4284e-04
Loss = 7.2640e-02, PNorm = 64.9894, GNorm = 0.7816, lr_0 = 3.4260e-04
Loss = 7.0033e-02, PNorm = 64.9974, GNorm = 0.5643, lr_0 = 3.4237e-04
Loss = 5.9569e-02, PNorm = 65.0026, GNorm = 0.5410, lr_0 = 3.4213e-04
Loss = 6.0666e-02, PNorm = 65.0084, GNorm = 0.6453, lr_0 = 3.4190e-04
Loss = 6.2991e-02, PNorm = 65.0133, GNorm = 0.4497, lr_0 = 3.4167e-04
Loss = 6.2988e-02, PNorm = 65.0216, GNorm = 0.4604, lr_0 = 3.4143e-04
Loss = 5.8403e-02, PNorm = 65.0296, GNorm = 0.4635, lr_0 = 3.4120e-04
Loss = 5.5495e-02, PNorm = 65.0350, GNorm = 1.0358, lr_0 = 3.4096e-04
Loss = 6.5065e-02, PNorm = 65.0414, GNorm = 0.7143, lr_0 = 3.4073e-04
Loss = 6.8026e-02, PNorm = 65.0481, GNorm = 1.0024, lr_0 = 3.4050e-04
Loss = 6.0745e-02, PNorm = 65.0570, GNorm = 0.4894, lr_0 = 3.4026e-04
Loss = 6.4954e-02, PNorm = 65.0632, GNorm = 0.7952, lr_0 = 3.4003e-04
Loss = 6.2533e-02, PNorm = 65.0682, GNorm = 0.5325, lr_0 = 3.3980e-04
Loss = 6.5710e-02, PNorm = 65.0772, GNorm = 0.6707, lr_0 = 3.3956e-04
Loss = 6.3814e-02, PNorm = 65.0843, GNorm = 0.5306, lr_0 = 3.3933e-04
Loss = 6.3013e-02, PNorm = 65.0944, GNorm = 0.8487, lr_0 = 3.3910e-04
Loss = 6.8010e-02, PNorm = 65.1012, GNorm = 0.7438, lr_0 = 3.3887e-04
Loss = 7.2598e-02, PNorm = 65.1092, GNorm = 0.9312, lr_0 = 3.3864e-04
Loss = 6.6770e-02, PNorm = 65.1144, GNorm = 0.4303, lr_0 = 3.3840e-04
Loss = 7.0909e-02, PNorm = 65.1193, GNorm = 0.7935, lr_0 = 3.3817e-04
Loss = 5.6807e-02, PNorm = 65.1254, GNorm = 0.4181, lr_0 = 3.3794e-04
Loss = 6.6713e-02, PNorm = 65.1333, GNorm = 0.5413, lr_0 = 3.3771e-04
Loss = 5.2778e-02, PNorm = 65.1389, GNorm = 0.7370, lr_0 = 3.3748e-04
Loss = 6.1685e-02, PNorm = 65.1470, GNorm = 0.8501, lr_0 = 3.3725e-04
Loss = 6.9187e-02, PNorm = 65.1546, GNorm = 0.6120, lr_0 = 3.3701e-04
Loss = 6.5242e-02, PNorm = 65.1624, GNorm = 0.5257, lr_0 = 3.3678e-04
Loss = 6.2345e-02, PNorm = 65.1703, GNorm = 0.5298, lr_0 = 3.3655e-04
Loss = 6.1068e-02, PNorm = 65.1752, GNorm = 0.7953, lr_0 = 3.3632e-04
Loss = 5.7103e-02, PNorm = 65.1801, GNorm = 0.5949, lr_0 = 3.3609e-04
Loss = 6.0951e-02, PNorm = 65.1855, GNorm = 0.4295, lr_0 = 3.3586e-04
Loss = 6.8523e-02, PNorm = 65.1936, GNorm = 0.6188, lr_0 = 3.3563e-04
Loss = 7.9525e-02, PNorm = 65.2018, GNorm = 0.6306, lr_0 = 3.3540e-04
Loss = 6.3681e-02, PNorm = 65.2117, GNorm = 0.5246, lr_0 = 3.3517e-04
Loss = 6.1710e-02, PNorm = 65.2171, GNorm = 0.7558, lr_0 = 3.3494e-04
Loss = 7.6334e-02, PNorm = 65.2220, GNorm = 0.5863, lr_0 = 3.3471e-04
Loss = 7.2410e-02, PNorm = 65.2309, GNorm = 0.6426, lr_0 = 3.3448e-04
Loss = 6.4646e-02, PNorm = 65.2414, GNorm = 0.5807, lr_0 = 3.3425e-04
Loss = 6.8304e-02, PNorm = 65.2494, GNorm = 0.4875, lr_0 = 3.3403e-04
Loss = 7.3452e-02, PNorm = 65.2550, GNorm = 0.7084, lr_0 = 3.3380e-04
Loss = 6.9476e-02, PNorm = 65.2642, GNorm = 0.5590, lr_0 = 3.3357e-04
Loss = 6.7881e-02, PNorm = 65.2746, GNorm = 0.6288, lr_0 = 3.3334e-04
Loss = 6.4181e-02, PNorm = 65.2848, GNorm = 0.6049, lr_0 = 3.3311e-04
Loss = 6.7777e-02, PNorm = 65.2940, GNorm = 0.6134, lr_0 = 3.3288e-04
Loss = 6.4756e-02, PNorm = 65.2998, GNorm = 0.5454, lr_0 = 3.3265e-04
Loss = 7.4181e-02, PNorm = 65.3068, GNorm = 0.4671, lr_0 = 3.3243e-04
Loss = 6.3291e-02, PNorm = 65.3132, GNorm = 0.7382, lr_0 = 3.3220e-04
Loss = 6.4733e-02, PNorm = 65.3197, GNorm = 0.4567, lr_0 = 3.3197e-04
Loss = 7.0063e-02, PNorm = 65.3246, GNorm = 0.4679, lr_0 = 3.3174e-04
Loss = 6.0213e-02, PNorm = 65.3311, GNorm = 0.7982, lr_0 = 3.3152e-04
Loss = 6.7766e-02, PNorm = 65.3382, GNorm = 0.4808, lr_0 = 3.3129e-04
Loss = 7.1920e-02, PNorm = 65.3427, GNorm = 0.5705, lr_0 = 3.3106e-04
Loss = 6.1920e-02, PNorm = 65.3497, GNorm = 0.7087, lr_0 = 3.3084e-04
Loss = 6.6674e-02, PNorm = 65.3580, GNorm = 0.7767, lr_0 = 3.3061e-04
Loss = 6.8419e-02, PNorm = 65.3685, GNorm = 0.6673, lr_0 = 3.3038e-04
Loss = 8.1511e-02, PNorm = 65.3755, GNorm = 0.7757, lr_0 = 3.3016e-04
Loss = 6.9121e-02, PNorm = 65.3826, GNorm = 0.3921, lr_0 = 3.2993e-04
Loss = 6.2301e-02, PNorm = 65.3890, GNorm = 0.4499, lr_0 = 3.2970e-04
Loss = 6.4668e-02, PNorm = 65.3957, GNorm = 0.4687, lr_0 = 3.2948e-04
Loss = 6.6784e-02, PNorm = 65.4005, GNorm = 0.9686, lr_0 = 3.2925e-04
Loss = 6.0991e-02, PNorm = 65.4058, GNorm = 1.1504, lr_0 = 3.2903e-04
Loss = 5.2393e-02, PNorm = 65.4101, GNorm = 0.4335, lr_0 = 3.2880e-04
Loss = 7.2314e-02, PNorm = 65.4139, GNorm = 0.7603, lr_0 = 3.2858e-04
Loss = 6.6318e-02, PNorm = 65.4201, GNorm = 0.4434, lr_0 = 3.2835e-04
Loss = 7.1961e-02, PNorm = 65.4278, GNorm = 0.5068, lr_0 = 3.2813e-04
Loss = 7.4047e-02, PNorm = 65.4363, GNorm = 0.9670, lr_0 = 3.2790e-04
Loss = 6.8875e-02, PNorm = 65.4437, GNorm = 0.5897, lr_0 = 3.2768e-04
Loss = 6.9443e-02, PNorm = 65.4510, GNorm = 0.5854, lr_0 = 3.2745e-04
Loss = 5.9100e-02, PNorm = 65.4543, GNorm = 0.4679, lr_0 = 3.2723e-04
Loss = 6.8966e-02, PNorm = 65.4584, GNorm = 0.7547, lr_0 = 3.2700e-04
Loss = 6.9715e-02, PNorm = 65.4652, GNorm = 0.4779, lr_0 = 3.2678e-04
Loss = 6.5978e-02, PNorm = 65.4723, GNorm = 0.4998, lr_0 = 3.2656e-04
Loss = 7.6122e-02, PNorm = 65.4776, GNorm = 0.4769, lr_0 = 3.2633e-04
Loss = 8.0125e-02, PNorm = 65.4835, GNorm = 0.7647, lr_0 = 3.2611e-04
Loss = 6.7193e-02, PNorm = 65.4907, GNorm = 0.6264, lr_0 = 3.2589e-04
Loss = 6.2324e-02, PNorm = 65.4980, GNorm = 0.4652, lr_0 = 3.2566e-04
Loss = 8.1771e-02, PNorm = 65.5051, GNorm = 0.5312, lr_0 = 3.2544e-04
Loss = 6.3649e-02, PNorm = 65.5085, GNorm = 0.5661, lr_0 = 3.2522e-04
Loss = 6.1071e-02, PNorm = 65.5151, GNorm = 0.4964, lr_0 = 3.2499e-04
Loss = 5.7275e-02, PNorm = 65.5212, GNorm = 0.4787, lr_0 = 3.2477e-04
Loss = 5.5883e-02, PNorm = 65.5251, GNorm = 0.4092, lr_0 = 3.2455e-04
Loss = 5.9470e-02, PNorm = 65.5296, GNorm = 0.4583, lr_0 = 3.2433e-04
Loss = 6.9531e-02, PNorm = 65.5340, GNorm = 0.9601, lr_0 = 3.2410e-04
Loss = 6.8170e-02, PNorm = 65.5398, GNorm = 0.7605, lr_0 = 3.2388e-04
Loss = 8.0621e-02, PNorm = 65.5458, GNorm = 0.7617, lr_0 = 3.2366e-04
Loss = 6.1892e-02, PNorm = 65.5493, GNorm = 0.5252, lr_0 = 3.2344e-04
Loss = 6.4835e-02, PNorm = 65.5518, GNorm = 0.5947, lr_0 = 3.2322e-04
Loss = 7.2423e-02, PNorm = 65.5543, GNorm = 1.1627, lr_0 = 3.2300e-04
Loss = 6.9016e-02, PNorm = 65.5582, GNorm = 0.8310, lr_0 = 3.2277e-04
Loss = 5.9975e-02, PNorm = 65.5659, GNorm = 0.6154, lr_0 = 3.2255e-04
Loss = 5.9805e-02, PNorm = 65.5751, GNorm = 0.5859, lr_0 = 3.2233e-04
Loss = 6.3211e-02, PNorm = 65.5807, GNorm = 0.5427, lr_0 = 3.2211e-04
Loss = 6.5082e-02, PNorm = 65.5851, GNorm = 0.5953, lr_0 = 3.2189e-04
Loss = 6.4287e-02, PNorm = 65.5896, GNorm = 0.6455, lr_0 = 3.2167e-04
Loss = 7.3989e-02, PNorm = 65.5967, GNorm = 0.6203, lr_0 = 3.2145e-04
Loss = 6.7717e-02, PNorm = 65.6031, GNorm = 1.0886, lr_0 = 3.2123e-04
Loss = 6.5306e-02, PNorm = 65.6075, GNorm = 0.8039, lr_0 = 3.2101e-04
Loss = 6.5289e-02, PNorm = 65.6171, GNorm = 0.4745, lr_0 = 3.2079e-04
Loss = 6.2773e-02, PNorm = 65.6245, GNorm = 0.6098, lr_0 = 3.2057e-04
Loss = 6.1147e-02, PNorm = 65.6291, GNorm = 0.4082, lr_0 = 3.2035e-04
Loss = 6.5771e-02, PNorm = 65.6330, GNorm = 0.7066, lr_0 = 3.2013e-04
Loss = 6.1572e-02, PNorm = 65.6362, GNorm = 0.4554, lr_0 = 3.1991e-04
Loss = 6.5590e-02, PNorm = 65.6408, GNorm = 0.4723, lr_0 = 3.1969e-04
Loss = 6.1123e-02, PNorm = 65.6492, GNorm = 0.5973, lr_0 = 3.1947e-04
Loss = 6.9644e-02, PNorm = 65.6570, GNorm = 0.4897, lr_0 = 3.1925e-04
Loss = 5.8128e-02, PNorm = 65.6616, GNorm = 0.4886, lr_0 = 3.1904e-04
Loss = 6.9270e-02, PNorm = 65.6685, GNorm = 0.7604, lr_0 = 3.1882e-04
Loss = 7.1279e-02, PNorm = 65.6756, GNorm = 0.5524, lr_0 = 3.1860e-04
Loss = 5.9461e-02, PNorm = 65.6818, GNorm = 0.4253, lr_0 = 3.1838e-04
Loss = 6.1116e-02, PNorm = 65.6857, GNorm = 0.4242, lr_0 = 3.1816e-04
Loss = 6.3525e-02, PNorm = 65.6903, GNorm = 0.4262, lr_0 = 3.1794e-04
Loss = 5.9218e-02, PNorm = 65.6962, GNorm = 0.5549, lr_0 = 3.1773e-04
Loss = 6.6882e-02, PNorm = 65.7036, GNorm = 0.9283, lr_0 = 3.1751e-04
Loss = 7.6107e-02, PNorm = 65.7092, GNorm = 0.6118, lr_0 = 3.1729e-04
Loss = 6.5946e-02, PNorm = 65.7146, GNorm = 0.6647, lr_0 = 3.1707e-04
Loss = 6.7536e-02, PNorm = 65.7213, GNorm = 0.6901, lr_0 = 3.1686e-04
Loss = 6.4730e-02, PNorm = 65.7280, GNorm = 0.6991, lr_0 = 3.1664e-04
Loss = 6.3175e-02, PNorm = 65.7336, GNorm = 0.4396, lr_0 = 3.1642e-04
Loss = 6.3535e-02, PNorm = 65.7375, GNorm = 0.6322, lr_0 = 3.1621e-04
Validation mae = 0.388783
Epoch 16
Loss = 5.7991e-02, PNorm = 65.7430, GNorm = 0.4761, lr_0 = 3.1599e-04
Loss = 7.0459e-02, PNorm = 65.7516, GNorm = 0.8264, lr_0 = 3.1577e-04
Loss = 5.6830e-02, PNorm = 65.7589, GNorm = 0.8140, lr_0 = 3.1556e-04
Loss = 6.6568e-02, PNorm = 65.7640, GNorm = 0.8241, lr_0 = 3.1534e-04
Loss = 5.7414e-02, PNorm = 65.7707, GNorm = 0.4466, lr_0 = 3.1512e-04
Loss = 5.9632e-02, PNorm = 65.7763, GNorm = 0.4681, lr_0 = 3.1491e-04
Loss = 5.8108e-02, PNorm = 65.7844, GNorm = 0.5687, lr_0 = 3.1469e-04
Loss = 5.1844e-02, PNorm = 65.7926, GNorm = 0.6455, lr_0 = 3.1448e-04
Loss = 6.0190e-02, PNorm = 65.7994, GNorm = 0.6402, lr_0 = 3.1426e-04
Loss = 6.3623e-02, PNorm = 65.8085, GNorm = 0.5309, lr_0 = 3.1405e-04
Loss = 5.2909e-02, PNorm = 65.8168, GNorm = 0.4989, lr_0 = 3.1383e-04
Loss = 6.7162e-02, PNorm = 65.8246, GNorm = 0.6007, lr_0 = 3.1362e-04
Loss = 6.3973e-02, PNorm = 65.8309, GNorm = 0.7954, lr_0 = 3.1340e-04
Loss = 5.4361e-02, PNorm = 65.8395, GNorm = 0.7260, lr_0 = 3.1319e-04
Loss = 5.8624e-02, PNorm = 65.8479, GNorm = 0.6087, lr_0 = 3.1297e-04
Loss = 6.3792e-02, PNorm = 65.8531, GNorm = 0.7519, lr_0 = 3.1276e-04
Loss = 5.1090e-02, PNorm = 65.8580, GNorm = 0.7140, lr_0 = 3.1254e-04
Loss = 6.1789e-02, PNorm = 65.8642, GNorm = 0.6183, lr_0 = 3.1233e-04
Loss = 5.3731e-02, PNorm = 65.8697, GNorm = 0.6733, lr_0 = 3.1212e-04
Loss = 5.8187e-02, PNorm = 65.8732, GNorm = 0.4439, lr_0 = 3.1190e-04
Loss = 5.2577e-02, PNorm = 65.8784, GNorm = 0.4405, lr_0 = 3.1169e-04
Loss = 6.3299e-02, PNorm = 65.8855, GNorm = 0.5224, lr_0 = 3.1147e-04
Loss = 5.3298e-02, PNorm = 65.8951, GNorm = 0.5471, lr_0 = 3.1126e-04
Loss = 6.2014e-02, PNorm = 65.9036, GNorm = 0.4089, lr_0 = 3.1105e-04
Loss = 6.9296e-02, PNorm = 65.9121, GNorm = 0.5560, lr_0 = 3.1083e-04
Loss = 6.2496e-02, PNorm = 65.9185, GNorm = 0.5248, lr_0 = 3.1062e-04
Loss = 5.9115e-02, PNorm = 65.9248, GNorm = 0.7031, lr_0 = 3.1041e-04
Loss = 6.4288e-02, PNorm = 65.9296, GNorm = 0.6526, lr_0 = 3.1020e-04
Loss = 6.7086e-02, PNorm = 65.9348, GNorm = 0.6029, lr_0 = 3.0998e-04
Loss = 6.1805e-02, PNorm = 65.9395, GNorm = 0.4651, lr_0 = 3.0977e-04
Loss = 5.7486e-02, PNorm = 65.9435, GNorm = 0.5988, lr_0 = 3.0956e-04
Loss = 6.5973e-02, PNorm = 65.9481, GNorm = 0.4846, lr_0 = 3.0935e-04
Loss = 5.2354e-02, PNorm = 65.9547, GNorm = 0.3862, lr_0 = 3.0914e-04
Loss = 5.7223e-02, PNorm = 65.9598, GNorm = 0.4560, lr_0 = 3.0892e-04
Loss = 6.3696e-02, PNorm = 65.9665, GNorm = 0.5572, lr_0 = 3.0871e-04
Loss = 5.4401e-02, PNorm = 65.9715, GNorm = 0.5166, lr_0 = 3.0850e-04
Loss = 5.5193e-02, PNorm = 65.9772, GNorm = 0.5204, lr_0 = 3.0829e-04
Loss = 5.4269e-02, PNorm = 65.9825, GNorm = 0.5516, lr_0 = 3.0808e-04
Loss = 7.0685e-02, PNorm = 65.9854, GNorm = 0.6600, lr_0 = 3.0787e-04
Loss = 6.6783e-02, PNorm = 65.9923, GNorm = 0.5591, lr_0 = 3.0766e-04
Loss = 5.1620e-02, PNorm = 65.9998, GNorm = 0.4911, lr_0 = 3.0745e-04
Loss = 5.5097e-02, PNorm = 66.0065, GNorm = 0.5774, lr_0 = 3.0723e-04
Loss = 5.3128e-02, PNorm = 66.0123, GNorm = 0.5429, lr_0 = 3.0702e-04
Loss = 5.9529e-02, PNorm = 66.0200, GNorm = 0.4450, lr_0 = 3.0681e-04
Loss = 5.7327e-02, PNorm = 66.0265, GNorm = 0.7246, lr_0 = 3.0660e-04
Loss = 6.4270e-02, PNorm = 66.0320, GNorm = 0.6627, lr_0 = 3.0639e-04
Loss = 6.0955e-02, PNorm = 66.0372, GNorm = 0.5459, lr_0 = 3.0618e-04
Loss = 5.2961e-02, PNorm = 66.0425, GNorm = 0.6701, lr_0 = 3.0597e-04
Loss = 6.0299e-02, PNorm = 66.0496, GNorm = 0.4444, lr_0 = 3.0576e-04
Loss = 6.0034e-02, PNorm = 66.0556, GNorm = 0.6923, lr_0 = 3.0555e-04
Loss = 6.7014e-02, PNorm = 66.0613, GNorm = 0.6062, lr_0 = 3.0535e-04
Loss = 5.6189e-02, PNorm = 66.0669, GNorm = 0.7133, lr_0 = 3.0514e-04
Loss = 5.7002e-02, PNorm = 66.0703, GNorm = 0.5861, lr_0 = 3.0493e-04
Loss = 6.1947e-02, PNorm = 66.0724, GNorm = 1.0770, lr_0 = 3.0472e-04
Loss = 6.2425e-02, PNorm = 66.0766, GNorm = 0.5833, lr_0 = 3.0451e-04
Loss = 6.0443e-02, PNorm = 66.0829, GNorm = 0.7302, lr_0 = 3.0430e-04
Loss = 5.9109e-02, PNorm = 66.0889, GNorm = 0.4924, lr_0 = 3.0409e-04
Loss = 6.4322e-02, PNorm = 66.0962, GNorm = 0.6693, lr_0 = 3.0388e-04
Loss = 5.8425e-02, PNorm = 66.1037, GNorm = 0.5224, lr_0 = 3.0368e-04
Loss = 6.5322e-02, PNorm = 66.1098, GNorm = 0.6295, lr_0 = 3.0347e-04
Loss = 7.0353e-02, PNorm = 66.1169, GNorm = 0.5855, lr_0 = 3.0326e-04
Loss = 5.8817e-02, PNorm = 66.1231, GNorm = 0.6513, lr_0 = 3.0305e-04
Loss = 5.1094e-02, PNorm = 66.1277, GNorm = 0.5297, lr_0 = 3.0284e-04
Loss = 6.6004e-02, PNorm = 66.1322, GNorm = 2.0152, lr_0 = 3.0264e-04
Loss = 6.2256e-02, PNorm = 66.1365, GNorm = 0.6311, lr_0 = 3.0243e-04
Loss = 6.4540e-02, PNorm = 66.1421, GNorm = 0.6173, lr_0 = 3.0222e-04
Loss = 6.9522e-02, PNorm = 66.1470, GNorm = 0.5290, lr_0 = 3.0202e-04
Loss = 6.1947e-02, PNorm = 66.1500, GNorm = 0.7461, lr_0 = 3.0181e-04
Loss = 6.3019e-02, PNorm = 66.1540, GNorm = 0.4982, lr_0 = 3.0160e-04
Loss = 5.6480e-02, PNorm = 66.1595, GNorm = 0.6536, lr_0 = 3.0140e-04
Loss = 6.7118e-02, PNorm = 66.1674, GNorm = 0.8856, lr_0 = 3.0119e-04
Loss = 6.0046e-02, PNorm = 66.1756, GNorm = 0.4328, lr_0 = 3.0098e-04
Loss = 7.8244e-02, PNorm = 66.1823, GNorm = 0.9247, lr_0 = 3.0078e-04
Loss = 6.0886e-02, PNorm = 66.1887, GNorm = 0.8279, lr_0 = 3.0057e-04
Loss = 6.6918e-02, PNorm = 66.1941, GNorm = 1.1269, lr_0 = 3.0036e-04
Loss = 6.8629e-02, PNorm = 66.2001, GNorm = 1.0236, lr_0 = 3.0016e-04
Loss = 5.6757e-02, PNorm = 66.2063, GNorm = 0.5140, lr_0 = 2.9995e-04
Loss = 6.3333e-02, PNorm = 66.2121, GNorm = 0.5649, lr_0 = 2.9975e-04
Loss = 6.1854e-02, PNorm = 66.2187, GNorm = 0.5920, lr_0 = 2.9954e-04
Loss = 6.8324e-02, PNorm = 66.2236, GNorm = 0.6978, lr_0 = 2.9934e-04
Loss = 6.1826e-02, PNorm = 66.2280, GNorm = 0.5640, lr_0 = 2.9913e-04
Loss = 5.3531e-02, PNorm = 66.2324, GNorm = 0.5238, lr_0 = 2.9893e-04
Loss = 5.3811e-02, PNorm = 66.2378, GNorm = 0.7017, lr_0 = 2.9872e-04
Loss = 6.3872e-02, PNorm = 66.2446, GNorm = 0.6675, lr_0 = 2.9852e-04
Loss = 6.0746e-02, PNorm = 66.2492, GNorm = 0.4961, lr_0 = 2.9831e-04
Loss = 5.8175e-02, PNorm = 66.2536, GNorm = 0.5116, lr_0 = 2.9811e-04
Loss = 7.0923e-02, PNorm = 66.2589, GNorm = 0.7354, lr_0 = 2.9790e-04
Loss = 6.7533e-02, PNorm = 66.2657, GNorm = 0.5787, lr_0 = 2.9770e-04
Loss = 6.7372e-02, PNorm = 66.2710, GNorm = 0.5020, lr_0 = 2.9750e-04
Loss = 6.7217e-02, PNorm = 66.2778, GNorm = 0.4348, lr_0 = 2.9729e-04
Loss = 5.7726e-02, PNorm = 66.2866, GNorm = 0.5231, lr_0 = 2.9709e-04
Loss = 6.5662e-02, PNorm = 66.2931, GNorm = 0.6940, lr_0 = 2.9689e-04
Loss = 6.4339e-02, PNorm = 66.2945, GNorm = 0.4125, lr_0 = 2.9668e-04
Loss = 6.1734e-02, PNorm = 66.2976, GNorm = 0.4828, lr_0 = 2.9648e-04
Loss = 6.9106e-02, PNorm = 66.3024, GNorm = 0.5896, lr_0 = 2.9628e-04
Loss = 5.8868e-02, PNorm = 66.3068, GNorm = 0.4988, lr_0 = 2.9607e-04
Loss = 5.9568e-02, PNorm = 66.3131, GNorm = 0.5066, lr_0 = 2.9587e-04
Loss = 6.8383e-02, PNorm = 66.3187, GNorm = 0.8158, lr_0 = 2.9567e-04
Loss = 6.0698e-02, PNorm = 66.3238, GNorm = 0.8568, lr_0 = 2.9546e-04
Loss = 6.0231e-02, PNorm = 66.3300, GNorm = 0.6516, lr_0 = 2.9526e-04
Loss = 6.3732e-02, PNorm = 66.3359, GNorm = 0.7440, lr_0 = 2.9506e-04
Loss = 5.2336e-02, PNorm = 66.3416, GNorm = 0.5273, lr_0 = 2.9486e-04
Loss = 6.0587e-02, PNorm = 66.3479, GNorm = 0.5594, lr_0 = 2.9466e-04
Loss = 6.9496e-02, PNorm = 66.3551, GNorm = 0.5806, lr_0 = 2.9445e-04
Loss = 6.5227e-02, PNorm = 66.3602, GNorm = 0.4684, lr_0 = 2.9425e-04
Loss = 6.7441e-02, PNorm = 66.3627, GNorm = 0.5563, lr_0 = 2.9405e-04
Loss = 6.5058e-02, PNorm = 66.3682, GNorm = 0.8167, lr_0 = 2.9385e-04
Loss = 5.8249e-02, PNorm = 66.3743, GNorm = 0.4023, lr_0 = 2.9365e-04
Loss = 5.7709e-02, PNorm = 66.3799, GNorm = 0.6675, lr_0 = 2.9345e-04
Loss = 6.9161e-02, PNorm = 66.3864, GNorm = 0.6647, lr_0 = 2.9325e-04
Loss = 7.3058e-02, PNorm = 66.3937, GNorm = 0.7502, lr_0 = 2.9305e-04
Loss = 7.7501e-02, PNorm = 66.3988, GNorm = 0.4572, lr_0 = 2.9284e-04
Loss = 5.8867e-02, PNorm = 66.4030, GNorm = 0.7177, lr_0 = 2.9264e-04
Loss = 5.4915e-02, PNorm = 66.4067, GNorm = 0.4521, lr_0 = 2.9244e-04
Loss = 6.8908e-02, PNorm = 66.4099, GNorm = 0.5648, lr_0 = 2.9224e-04
Loss = 6.1786e-02, PNorm = 66.4144, GNorm = 0.4733, lr_0 = 2.9204e-04
Loss = 7.5800e-02, PNorm = 66.4208, GNorm = 0.4228, lr_0 = 2.9184e-04
Loss = 6.5454e-02, PNorm = 66.4274, GNorm = 0.6361, lr_0 = 2.9164e-04
Loss = 6.2330e-02, PNorm = 66.4322, GNorm = 0.7223, lr_0 = 2.9144e-04
Loss = 6.7473e-02, PNorm = 66.4398, GNorm = 0.4960, lr_0 = 2.9124e-04
Validation mae = 0.385736
Epoch 17
Loss = 5.0036e-02, PNorm = 66.4462, GNorm = 0.5397, lr_0 = 2.9104e-04
Loss = 5.1853e-02, PNorm = 66.4534, GNorm = 0.7365, lr_0 = 2.9084e-04
Loss = 5.4613e-02, PNorm = 66.4584, GNorm = 0.4531, lr_0 = 2.9065e-04
Loss = 5.0212e-02, PNorm = 66.4622, GNorm = 0.6468, lr_0 = 2.9045e-04
Loss = 5.4566e-02, PNorm = 66.4670, GNorm = 0.6342, lr_0 = 2.9025e-04
Loss = 5.2243e-02, PNorm = 66.4709, GNorm = 0.8734, lr_0 = 2.9005e-04
Loss = 4.6862e-02, PNorm = 66.4767, GNorm = 0.7932, lr_0 = 2.8985e-04
Loss = 5.9633e-02, PNorm = 66.4810, GNorm = 0.5238, lr_0 = 2.8965e-04
Loss = 6.6583e-02, PNorm = 66.4862, GNorm = 0.9834, lr_0 = 2.8945e-04
Loss = 6.1125e-02, PNorm = 66.4937, GNorm = 0.6776, lr_0 = 2.8925e-04
Loss = 6.0662e-02, PNorm = 66.5009, GNorm = 0.5359, lr_0 = 2.8906e-04
Loss = 5.1017e-02, PNorm = 66.5076, GNorm = 0.4402, lr_0 = 2.8886e-04
Loss = 5.4294e-02, PNorm = 66.5138, GNorm = 0.5058, lr_0 = 2.8866e-04
Loss = 5.2530e-02, PNorm = 66.5202, GNorm = 0.5144, lr_0 = 2.8846e-04
Loss = 4.4748e-02, PNorm = 66.5243, GNorm = 0.4431, lr_0 = 2.8826e-04
Loss = 4.9639e-02, PNorm = 66.5282, GNorm = 0.4807, lr_0 = 2.8807e-04
Loss = 4.6093e-02, PNorm = 66.5338, GNorm = 0.6013, lr_0 = 2.8787e-04
Loss = 4.8338e-02, PNorm = 66.5404, GNorm = 0.5030, lr_0 = 2.8767e-04
Loss = 5.2502e-02, PNorm = 66.5452, GNorm = 0.6094, lr_0 = 2.8748e-04
Loss = 5.6114e-02, PNorm = 66.5513, GNorm = 0.5412, lr_0 = 2.8728e-04
Loss = 7.1858e-02, PNorm = 66.5568, GNorm = 0.4975, lr_0 = 2.8708e-04
Loss = 5.5470e-02, PNorm = 66.5629, GNorm = 0.2930, lr_0 = 2.8689e-04
Loss = 5.2778e-02, PNorm = 66.5683, GNorm = 0.6471, lr_0 = 2.8669e-04
Loss = 5.2924e-02, PNorm = 66.5728, GNorm = 0.9570, lr_0 = 2.8649e-04
Loss = 4.9654e-02, PNorm = 66.5787, GNorm = 0.4991, lr_0 = 2.8630e-04
Loss = 5.9506e-02, PNorm = 66.5832, GNorm = 0.5422, lr_0 = 2.8610e-04
Loss = 5.6606e-02, PNorm = 66.5899, GNorm = 0.7337, lr_0 = 2.8590e-04
Loss = 5.9390e-02, PNorm = 66.5976, GNorm = 0.9475, lr_0 = 2.8571e-04
Loss = 5.4391e-02, PNorm = 66.6030, GNorm = 0.4910, lr_0 = 2.8551e-04
Loss = 5.0432e-02, PNorm = 66.6079, GNorm = 0.3458, lr_0 = 2.8532e-04
Loss = 5.3971e-02, PNorm = 66.6132, GNorm = 0.3725, lr_0 = 2.8512e-04
Loss = 5.6487e-02, PNorm = 66.6176, GNorm = 0.6499, lr_0 = 2.8493e-04
Loss = 6.8607e-02, PNorm = 66.6218, GNorm = 0.8368, lr_0 = 2.8473e-04
Loss = 6.2174e-02, PNorm = 66.6288, GNorm = 0.9006, lr_0 = 2.8454e-04
Loss = 5.9438e-02, PNorm = 66.6360, GNorm = 0.5407, lr_0 = 2.8434e-04
Loss = 6.2734e-02, PNorm = 66.6420, GNorm = 0.4391, lr_0 = 2.8415e-04
Loss = 4.8854e-02, PNorm = 66.6485, GNorm = 0.7022, lr_0 = 2.8395e-04
Loss = 5.1103e-02, PNorm = 66.6522, GNorm = 0.8447, lr_0 = 2.8376e-04
Loss = 6.8252e-02, PNorm = 66.6556, GNorm = 0.5087, lr_0 = 2.8356e-04
Loss = 6.3202e-02, PNorm = 66.6621, GNorm = 0.5066, lr_0 = 2.8337e-04
Loss = 6.0461e-02, PNorm = 66.6686, GNorm = 0.4739, lr_0 = 2.8317e-04
Loss = 5.4174e-02, PNorm = 66.6747, GNorm = 0.6790, lr_0 = 2.8298e-04
Loss = 5.8701e-02, PNorm = 66.6804, GNorm = 0.5171, lr_0 = 2.8279e-04
Loss = 5.6287e-02, PNorm = 66.6854, GNorm = 0.6426, lr_0 = 2.8259e-04
Loss = 5.1686e-02, PNorm = 66.6889, GNorm = 0.8670, lr_0 = 2.8240e-04
Loss = 5.5447e-02, PNorm = 66.6943, GNorm = 0.6014, lr_0 = 2.8221e-04
Loss = 5.6525e-02, PNorm = 66.7016, GNorm = 0.4233, lr_0 = 2.8201e-04
Loss = 5.6748e-02, PNorm = 66.7063, GNorm = 0.4956, lr_0 = 2.8182e-04
Loss = 5.3069e-02, PNorm = 66.7106, GNorm = 0.9047, lr_0 = 2.8163e-04
Loss = 6.3139e-02, PNorm = 66.7171, GNorm = 0.4838, lr_0 = 2.8143e-04
Loss = 5.3963e-02, PNorm = 66.7206, GNorm = 0.4252, lr_0 = 2.8124e-04
Loss = 6.8690e-02, PNorm = 66.7243, GNorm = 0.6180, lr_0 = 2.8105e-04
Loss = 5.5048e-02, PNorm = 66.7304, GNorm = 0.9166, lr_0 = 2.8085e-04
Loss = 7.2166e-02, PNorm = 66.7377, GNorm = 0.7105, lr_0 = 2.8066e-04
Loss = 5.9658e-02, PNorm = 66.7421, GNorm = 0.5405, lr_0 = 2.8047e-04
Loss = 5.4441e-02, PNorm = 66.7457, GNorm = 0.6891, lr_0 = 2.8028e-04
Loss = 5.7280e-02, PNorm = 66.7514, GNorm = 0.5039, lr_0 = 2.8009e-04
Loss = 5.6894e-02, PNorm = 66.7576, GNorm = 0.7438, lr_0 = 2.7989e-04
Loss = 5.2020e-02, PNorm = 66.7628, GNorm = 0.4773, lr_0 = 2.7970e-04
Loss = 5.8161e-02, PNorm = 66.7676, GNorm = 0.6116, lr_0 = 2.7951e-04
Loss = 6.4254e-02, PNorm = 66.7741, GNorm = 0.6696, lr_0 = 2.7932e-04
Loss = 6.2918e-02, PNorm = 66.7786, GNorm = 0.4882, lr_0 = 2.7913e-04
Loss = 5.9174e-02, PNorm = 66.7832, GNorm = 0.6844, lr_0 = 2.7894e-04
Loss = 5.7257e-02, PNorm = 66.7907, GNorm = 0.4521, lr_0 = 2.7875e-04
Loss = 5.8410e-02, PNorm = 66.7995, GNorm = 0.5594, lr_0 = 2.7855e-04
Loss = 5.9941e-02, PNorm = 66.8064, GNorm = 0.5307, lr_0 = 2.7836e-04
Loss = 5.0854e-02, PNorm = 66.8115, GNorm = 0.5652, lr_0 = 2.7817e-04
Loss = 5.7330e-02, PNorm = 66.8156, GNorm = 0.4552, lr_0 = 2.7798e-04
Loss = 5.4072e-02, PNorm = 66.8193, GNorm = 0.4851, lr_0 = 2.7779e-04
Loss = 5.3506e-02, PNorm = 66.8248, GNorm = 0.8892, lr_0 = 2.7760e-04
Loss = 5.1260e-02, PNorm = 66.8311, GNorm = 0.5265, lr_0 = 2.7741e-04
Loss = 6.2725e-02, PNorm = 66.8338, GNorm = 0.6405, lr_0 = 2.7722e-04
Loss = 5.7097e-02, PNorm = 66.8371, GNorm = 0.7075, lr_0 = 2.7703e-04
Loss = 6.0463e-02, PNorm = 66.8436, GNorm = 0.6812, lr_0 = 2.7684e-04
Loss = 6.1883e-02, PNorm = 66.8502, GNorm = 0.6494, lr_0 = 2.7665e-04
Loss = 6.0438e-02, PNorm = 66.8570, GNorm = 0.4893, lr_0 = 2.7646e-04
Loss = 6.4818e-02, PNorm = 66.8629, GNorm = 0.7938, lr_0 = 2.7627e-04
Loss = 5.8675e-02, PNorm = 66.8686, GNorm = 0.6175, lr_0 = 2.7608e-04
Loss = 6.7843e-02, PNorm = 66.8727, GNorm = 0.4171, lr_0 = 2.7590e-04
Loss = 5.6141e-02, PNorm = 66.8750, GNorm = 0.8125, lr_0 = 2.7571e-04
Loss = 5.9614e-02, PNorm = 66.8781, GNorm = 0.5794, lr_0 = 2.7552e-04
Loss = 5.3995e-02, PNorm = 66.8828, GNorm = 0.4539, lr_0 = 2.7533e-04
Loss = 5.5961e-02, PNorm = 66.8880, GNorm = 0.5390, lr_0 = 2.7514e-04
Loss = 6.0598e-02, PNorm = 66.8930, GNorm = 0.5441, lr_0 = 2.7495e-04
Loss = 4.8430e-02, PNorm = 66.8966, GNorm = 0.6432, lr_0 = 2.7476e-04
Loss = 5.4008e-02, PNorm = 66.9008, GNorm = 0.6268, lr_0 = 2.7457e-04
Loss = 6.2851e-02, PNorm = 66.9059, GNorm = 0.7538, lr_0 = 2.7439e-04
Loss = 5.8725e-02, PNorm = 66.9123, GNorm = 0.6665, lr_0 = 2.7420e-04
Loss = 5.0885e-02, PNorm = 66.9187, GNorm = 0.4956, lr_0 = 2.7401e-04
Loss = 6.4320e-02, PNorm = 66.9242, GNorm = 0.7844, lr_0 = 2.7382e-04
Loss = 6.2886e-02, PNorm = 66.9314, GNorm = 0.8637, lr_0 = 2.7364e-04
Loss = 5.5725e-02, PNorm = 66.9361, GNorm = 0.6513, lr_0 = 2.7345e-04
Loss = 6.4883e-02, PNorm = 66.9399, GNorm = 0.5263, lr_0 = 2.7326e-04
Loss = 5.9380e-02, PNorm = 66.9442, GNorm = 0.8630, lr_0 = 2.7307e-04
Loss = 5.4177e-02, PNorm = 66.9499, GNorm = 0.8960, lr_0 = 2.7289e-04
Loss = 5.6039e-02, PNorm = 66.9528, GNorm = 0.5687, lr_0 = 2.7270e-04
Loss = 5.8708e-02, PNorm = 66.9579, GNorm = 0.7325, lr_0 = 2.7251e-04
Loss = 5.5102e-02, PNorm = 66.9631, GNorm = 0.9673, lr_0 = 2.7233e-04
Loss = 5.4100e-02, PNorm = 66.9675, GNorm = 0.4532, lr_0 = 2.7214e-04
Loss = 4.9531e-02, PNorm = 66.9735, GNorm = 0.5145, lr_0 = 2.7195e-04
Loss = 6.2535e-02, PNorm = 66.9789, GNorm = 0.5055, lr_0 = 2.7177e-04
Loss = 6.2410e-02, PNorm = 66.9833, GNorm = 0.7382, lr_0 = 2.7158e-04
Loss = 5.6773e-02, PNorm = 66.9867, GNorm = 0.4657, lr_0 = 2.7139e-04
Loss = 6.1552e-02, PNorm = 66.9896, GNorm = 0.5001, lr_0 = 2.7121e-04
Loss = 6.2543e-02, PNorm = 66.9919, GNorm = 1.1495, lr_0 = 2.7102e-04
Loss = 5.8160e-02, PNorm = 66.9963, GNorm = 0.6927, lr_0 = 2.7084e-04
Loss = 5.9421e-02, PNorm = 67.0002, GNorm = 0.5886, lr_0 = 2.7065e-04
Loss = 6.1784e-02, PNorm = 67.0045, GNorm = 0.5175, lr_0 = 2.7047e-04
Loss = 6.2951e-02, PNorm = 67.0104, GNorm = 0.8871, lr_0 = 2.7028e-04
Loss = 5.9719e-02, PNorm = 67.0189, GNorm = 0.5054, lr_0 = 2.7010e-04
Loss = 5.8196e-02, PNorm = 67.0239, GNorm = 0.5669, lr_0 = 2.6991e-04
Loss = 6.2258e-02, PNorm = 67.0279, GNorm = 0.7923, lr_0 = 2.6973e-04
Loss = 5.7807e-02, PNorm = 67.0335, GNorm = 0.6791, lr_0 = 2.6954e-04
Loss = 6.1257e-02, PNorm = 67.0369, GNorm = 0.6710, lr_0 = 2.6936e-04
Loss = 5.6421e-02, PNorm = 67.0415, GNorm = 0.4110, lr_0 = 2.6917e-04
Loss = 5.3931e-02, PNorm = 67.0474, GNorm = 0.8916, lr_0 = 2.6899e-04
Loss = 6.8861e-02, PNorm = 67.0524, GNorm = 0.4786, lr_0 = 2.6880e-04
Loss = 5.7287e-02, PNorm = 67.0548, GNorm = 0.4132, lr_0 = 2.6862e-04
Loss = 6.2135e-02, PNorm = 67.0574, GNorm = 0.4743, lr_0 = 2.6844e-04
Loss = 6.9286e-02, PNorm = 67.0615, GNorm = 0.8543, lr_0 = 2.6825e-04
Validation mae = 0.390323
Epoch 18
Loss = 5.8268e-02, PNorm = 67.0671, GNorm = 0.6259, lr_0 = 2.6807e-04
Loss = 5.3568e-02, PNorm = 67.0715, GNorm = 0.4757, lr_0 = 2.6788e-04
Loss = 4.3101e-02, PNorm = 67.0759, GNorm = 0.4225, lr_0 = 2.6770e-04
Loss = 5.7289e-02, PNorm = 67.0805, GNorm = 0.8528, lr_0 = 2.6752e-04
Loss = 4.2372e-02, PNorm = 67.0862, GNorm = 0.5495, lr_0 = 2.6733e-04
Loss = 5.4041e-02, PNorm = 67.0943, GNorm = 0.6140, lr_0 = 2.6715e-04
Loss = 5.6501e-02, PNorm = 67.1017, GNorm = 0.7631, lr_0 = 2.6697e-04
Loss = 5.0688e-02, PNorm = 67.1076, GNorm = 0.4655, lr_0 = 2.6678e-04
Loss = 5.1715e-02, PNorm = 67.1142, GNorm = 0.4624, lr_0 = 2.6660e-04
Loss = 5.7302e-02, PNorm = 67.1213, GNorm = 0.6060, lr_0 = 2.6642e-04
Loss = 5.0627e-02, PNorm = 67.1265, GNorm = 0.4646, lr_0 = 2.6624e-04
Loss = 5.2670e-02, PNorm = 67.1333, GNorm = 0.4916, lr_0 = 2.6605e-04
Loss = 4.9546e-02, PNorm = 67.1387, GNorm = 0.6565, lr_0 = 2.6587e-04
Loss = 5.3772e-02, PNorm = 67.1428, GNorm = 0.5658, lr_0 = 2.6569e-04
Loss = 4.8782e-02, PNorm = 67.1443, GNorm = 0.5433, lr_0 = 2.6551e-04
Loss = 5.7381e-02, PNorm = 67.1502, GNorm = 0.5091, lr_0 = 2.6533e-04
Loss = 4.5941e-02, PNorm = 67.1530, GNorm = 0.5920, lr_0 = 2.6514e-04
Loss = 4.7286e-02, PNorm = 67.1567, GNorm = 0.7753, lr_0 = 2.6496e-04
Loss = 5.6075e-02, PNorm = 67.1624, GNorm = 0.4562, lr_0 = 2.6478e-04
Loss = 5.5100e-02, PNorm = 67.1689, GNorm = 0.3488, lr_0 = 2.6460e-04
Loss = 5.5718e-02, PNorm = 67.1746, GNorm = 0.5782, lr_0 = 2.6442e-04
Loss = 5.2177e-02, PNorm = 67.1785, GNorm = 0.6730, lr_0 = 2.6424e-04
Loss = 5.9263e-02, PNorm = 67.1838, GNorm = 0.6260, lr_0 = 2.6406e-04
Loss = 4.9924e-02, PNorm = 67.1910, GNorm = 0.5356, lr_0 = 2.6388e-04
Loss = 5.1777e-02, PNorm = 67.1961, GNorm = 0.4665, lr_0 = 2.6369e-04
Loss = 5.1917e-02, PNorm = 67.2001, GNorm = 0.3637, lr_0 = 2.6351e-04
Loss = 4.5599e-02, PNorm = 67.2045, GNorm = 0.5265, lr_0 = 2.6333e-04
Loss = 5.4807e-02, PNorm = 67.2098, GNorm = 0.6061, lr_0 = 2.6315e-04
Loss = 5.2953e-02, PNorm = 67.2155, GNorm = 0.4066, lr_0 = 2.6297e-04
Loss = 4.8676e-02, PNorm = 67.2205, GNorm = 0.4584, lr_0 = 2.6279e-04
Loss = 4.7807e-02, PNorm = 67.2267, GNorm = 0.4559, lr_0 = 2.6261e-04
Loss = 5.5288e-02, PNorm = 67.2317, GNorm = 0.7446, lr_0 = 2.6243e-04
Loss = 4.7602e-02, PNorm = 67.2359, GNorm = 0.6395, lr_0 = 2.6225e-04
Loss = 5.4174e-02, PNorm = 67.2376, GNorm = 0.4844, lr_0 = 2.6207e-04
Loss = 5.5759e-02, PNorm = 67.2421, GNorm = 0.4258, lr_0 = 2.6189e-04
Loss = 6.1204e-02, PNorm = 67.2474, GNorm = 1.2562, lr_0 = 2.6171e-04
Loss = 6.3873e-02, PNorm = 67.2527, GNorm = 0.4230, lr_0 = 2.6153e-04
Loss = 5.4681e-02, PNorm = 67.2578, GNorm = 0.5149, lr_0 = 2.6136e-04
Loss = 5.8304e-02, PNorm = 67.2606, GNorm = 0.5036, lr_0 = 2.6118e-04
Loss = 5.8655e-02, PNorm = 67.2666, GNorm = 0.5375, lr_0 = 2.6100e-04
Loss = 4.7391e-02, PNorm = 67.2693, GNorm = 0.5002, lr_0 = 2.6082e-04
Loss = 4.9789e-02, PNorm = 67.2720, GNorm = 0.4282, lr_0 = 2.6064e-04
Loss = 4.5333e-02, PNorm = 67.2765, GNorm = 0.4027, lr_0 = 2.6046e-04
Loss = 4.7797e-02, PNorm = 67.2813, GNorm = 0.3969, lr_0 = 2.6028e-04
Loss = 4.7566e-02, PNorm = 67.2861, GNorm = 0.5034, lr_0 = 2.6011e-04
Loss = 5.5062e-02, PNorm = 67.2891, GNorm = 0.4043, lr_0 = 2.5993e-04
Loss = 6.2554e-02, PNorm = 67.2924, GNorm = 0.5719, lr_0 = 2.5975e-04
Loss = 5.0239e-02, PNorm = 67.2978, GNorm = 0.4709, lr_0 = 2.5957e-04
Loss = 5.0130e-02, PNorm = 67.3010, GNorm = 0.7172, lr_0 = 2.5939e-04
Loss = 5.6083e-02, PNorm = 67.3059, GNorm = 0.8316, lr_0 = 2.5922e-04
Loss = 4.8523e-02, PNorm = 67.3101, GNorm = 0.5243, lr_0 = 2.5904e-04
Loss = 5.0241e-02, PNorm = 67.3145, GNorm = 0.7501, lr_0 = 2.5886e-04
Loss = 5.7767e-02, PNorm = 67.3174, GNorm = 0.6726, lr_0 = 2.5868e-04
Loss = 5.2934e-02, PNorm = 67.3222, GNorm = 0.6436, lr_0 = 2.5851e-04
Loss = 5.4432e-02, PNorm = 67.3268, GNorm = 0.5716, lr_0 = 2.5833e-04
Loss = 5.5308e-02, PNorm = 67.3329, GNorm = 0.5903, lr_0 = 2.5815e-04
Loss = 5.9954e-02, PNorm = 67.3383, GNorm = 0.4499, lr_0 = 2.5797e-04
Loss = 5.1494e-02, PNorm = 67.3424, GNorm = 0.5550, lr_0 = 2.5780e-04
Loss = 4.9279e-02, PNorm = 67.3479, GNorm = 0.5453, lr_0 = 2.5762e-04
Loss = 5.1337e-02, PNorm = 67.3537, GNorm = 0.5993, lr_0 = 2.5745e-04
Loss = 6.0987e-02, PNorm = 67.3601, GNorm = 0.6139, lr_0 = 2.5727e-04
Loss = 4.9938e-02, PNorm = 67.3665, GNorm = 0.7182, lr_0 = 2.5709e-04
Loss = 5.0807e-02, PNorm = 67.3706, GNorm = 0.7576, lr_0 = 2.5692e-04
Loss = 5.0910e-02, PNorm = 67.3746, GNorm = 0.7893, lr_0 = 2.5674e-04
Loss = 5.3024e-02, PNorm = 67.3791, GNorm = 0.7088, lr_0 = 2.5656e-04
Loss = 5.3862e-02, PNorm = 67.3845, GNorm = 0.4057, lr_0 = 2.5639e-04
Loss = 4.4949e-02, PNorm = 67.3892, GNorm = 0.4655, lr_0 = 2.5621e-04
Loss = 5.6927e-02, PNorm = 67.3934, GNorm = 0.6096, lr_0 = 2.5604e-04
Loss = 4.6735e-02, PNorm = 67.3991, GNorm = 0.5526, lr_0 = 2.5586e-04
Loss = 5.8144e-02, PNorm = 67.4049, GNorm = 0.6630, lr_0 = 2.5569e-04
Loss = 4.9434e-02, PNorm = 67.4110, GNorm = 0.4267, lr_0 = 2.5551e-04
Loss = 5.2075e-02, PNorm = 67.4149, GNorm = 0.4741, lr_0 = 2.5534e-04
Loss = 5.5606e-02, PNorm = 67.4206, GNorm = 0.6219, lr_0 = 2.5516e-04
Loss = 5.7431e-02, PNorm = 67.4258, GNorm = 0.7774, lr_0 = 2.5499e-04
Loss = 5.8527e-02, PNorm = 67.4314, GNorm = 0.5941, lr_0 = 2.5481e-04
Loss = 5.7613e-02, PNorm = 67.4358, GNorm = 0.4675, lr_0 = 2.5464e-04
Loss = 5.4216e-02, PNorm = 67.4364, GNorm = 0.6597, lr_0 = 2.5446e-04
Loss = 5.1988e-02, PNorm = 67.4400, GNorm = 0.7339, lr_0 = 2.5429e-04
Loss = 5.2401e-02, PNorm = 67.4444, GNorm = 0.6854, lr_0 = 2.5411e-04
Loss = 5.9033e-02, PNorm = 67.4498, GNorm = 0.5805, lr_0 = 2.5394e-04
Loss = 4.5557e-02, PNorm = 67.4530, GNorm = 0.6961, lr_0 = 2.5377e-04
Loss = 5.4481e-02, PNorm = 67.4547, GNorm = 0.5748, lr_0 = 2.5359e-04
Loss = 5.2469e-02, PNorm = 67.4612, GNorm = 0.5884, lr_0 = 2.5342e-04
Loss = 5.0896e-02, PNorm = 67.4652, GNorm = 0.4041, lr_0 = 2.5325e-04
Loss = 5.3172e-02, PNorm = 67.4684, GNorm = 0.4501, lr_0 = 2.5307e-04
Loss = 5.7947e-02, PNorm = 67.4701, GNorm = 0.6067, lr_0 = 2.5290e-04
Loss = 5.8823e-02, PNorm = 67.4743, GNorm = 0.8416, lr_0 = 2.5273e-04
Loss = 5.6165e-02, PNorm = 67.4780, GNorm = 0.5619, lr_0 = 2.5255e-04
Loss = 4.6722e-02, PNorm = 67.4831, GNorm = 0.6367, lr_0 = 2.5238e-04
Loss = 5.6811e-02, PNorm = 67.4866, GNorm = 0.6848, lr_0 = 2.5221e-04
Loss = 5.1633e-02, PNorm = 67.4912, GNorm = 0.5064, lr_0 = 2.5203e-04
Loss = 5.1540e-02, PNorm = 67.4955, GNorm = 0.7056, lr_0 = 2.5186e-04
Loss = 6.2357e-02, PNorm = 67.5003, GNorm = 0.6304, lr_0 = 2.5169e-04
Loss = 6.5759e-02, PNorm = 67.5050, GNorm = 0.7066, lr_0 = 2.5152e-04
Loss = 6.5118e-02, PNorm = 67.5108, GNorm = 0.8572, lr_0 = 2.5134e-04
Loss = 4.9481e-02, PNorm = 67.5157, GNorm = 0.6238, lr_0 = 2.5117e-04
Loss = 5.6651e-02, PNorm = 67.5211, GNorm = 0.7370, lr_0 = 2.5100e-04
Loss = 5.9789e-02, PNorm = 67.5233, GNorm = 0.4519, lr_0 = 2.5083e-04
Loss = 5.7258e-02, PNorm = 67.5273, GNorm = 1.0363, lr_0 = 2.5066e-04
Loss = 5.0693e-02, PNorm = 67.5322, GNorm = 0.4067, lr_0 = 2.5048e-04
Loss = 6.1485e-02, PNorm = 67.5362, GNorm = 0.8173, lr_0 = 2.5031e-04
Loss = 6.5475e-02, PNorm = 67.5405, GNorm = 0.7024, lr_0 = 2.5014e-04
Loss = 5.4042e-02, PNorm = 67.5434, GNorm = 0.5695, lr_0 = 2.4997e-04
Loss = 5.3635e-02, PNorm = 67.5482, GNorm = 0.4522, lr_0 = 2.4980e-04
Loss = 5.5370e-02, PNorm = 67.5541, GNorm = 0.6137, lr_0 = 2.4963e-04
Loss = 5.4940e-02, PNorm = 67.5579, GNorm = 0.5798, lr_0 = 2.4946e-04
Loss = 4.7883e-02, PNorm = 67.5609, GNorm = 0.4567, lr_0 = 2.4929e-04
Loss = 5.7154e-02, PNorm = 67.5660, GNorm = 0.5043, lr_0 = 2.4911e-04
Loss = 5.8414e-02, PNorm = 67.5703, GNorm = 0.5778, lr_0 = 2.4894e-04
Loss = 6.7064e-02, PNorm = 67.5732, GNorm = 0.8041, lr_0 = 2.4877e-04
Loss = 6.4767e-02, PNorm = 67.5781, GNorm = 0.6176, lr_0 = 2.4860e-04
Loss = 5.6172e-02, PNorm = 67.5822, GNorm = 0.5969, lr_0 = 2.4843e-04
Loss = 5.3104e-02, PNorm = 67.5849, GNorm = 0.4819, lr_0 = 2.4826e-04
Loss = 5.8824e-02, PNorm = 67.5883, GNorm = 0.5639, lr_0 = 2.4809e-04
Loss = 5.5718e-02, PNorm = 67.5912, GNorm = 0.9216, lr_0 = 2.4792e-04
Loss = 4.5941e-02, PNorm = 67.5961, GNorm = 0.3990, lr_0 = 2.4775e-04
Loss = 4.7038e-02, PNorm = 67.5982, GNorm = 0.9489, lr_0 = 2.4758e-04
Loss = 6.8843e-02, PNorm = 67.5997, GNorm = 0.8618, lr_0 = 2.4741e-04
Loss = 6.4408e-02, PNorm = 67.6014, GNorm = 0.4849, lr_0 = 2.4724e-04
Loss = 4.8490e-02, PNorm = 67.6044, GNorm = 0.6782, lr_0 = 2.4707e-04
Validation mae = 0.384346
Epoch 19
Loss = 5.1888e-02, PNorm = 67.6091, GNorm = 0.4316, lr_0 = 2.4690e-04
Loss = 5.2664e-02, PNorm = 67.6155, GNorm = 0.6435, lr_0 = 2.4674e-04
Loss = 5.0253e-02, PNorm = 67.6191, GNorm = 0.5257, lr_0 = 2.4657e-04
Loss = 4.8404e-02, PNorm = 67.6237, GNorm = 0.5185, lr_0 = 2.4640e-04
Loss = 4.7695e-02, PNorm = 67.6282, GNorm = 0.5476, lr_0 = 2.4623e-04
Loss = 4.1003e-02, PNorm = 67.6325, GNorm = 0.4684, lr_0 = 2.4606e-04
Loss = 5.6211e-02, PNorm = 67.6364, GNorm = 0.5267, lr_0 = 2.4589e-04
Loss = 5.3307e-02, PNorm = 67.6423, GNorm = 0.6767, lr_0 = 2.4572e-04
Loss = 5.8639e-02, PNorm = 67.6488, GNorm = 0.4781, lr_0 = 2.4556e-04
Loss = 4.9886e-02, PNorm = 67.6539, GNorm = 0.5487, lr_0 = 2.4539e-04
Loss = 4.7083e-02, PNorm = 67.6575, GNorm = 0.4803, lr_0 = 2.4522e-04
Loss = 5.9794e-02, PNorm = 67.6617, GNorm = 0.4867, lr_0 = 2.4505e-04
Loss = 4.6486e-02, PNorm = 67.6657, GNorm = 0.4604, lr_0 = 2.4488e-04
Loss = 4.9807e-02, PNorm = 67.6698, GNorm = 0.4917, lr_0 = 2.4472e-04
Loss = 5.8445e-02, PNorm = 67.6737, GNorm = 0.5713, lr_0 = 2.4455e-04
Loss = 4.4835e-02, PNorm = 67.6778, GNorm = 0.5051, lr_0 = 2.4438e-04
Loss = 5.0485e-02, PNorm = 67.6820, GNorm = 0.8120, lr_0 = 2.4421e-04
Loss = 4.2663e-02, PNorm = 67.6862, GNorm = 0.5069, lr_0 = 2.4405e-04
Loss = 4.2810e-02, PNorm = 67.6899, GNorm = 0.4974, lr_0 = 2.4388e-04
Loss = 4.5150e-02, PNorm = 67.6939, GNorm = 0.7477, lr_0 = 2.4371e-04
Loss = 4.3803e-02, PNorm = 67.6972, GNorm = 0.4182, lr_0 = 2.4354e-04
Loss = 4.7233e-02, PNorm = 67.7022, GNorm = 0.4922, lr_0 = 2.4338e-04
Loss = 4.9899e-02, PNorm = 67.7059, GNorm = 0.4434, lr_0 = 2.4321e-04
Loss = 4.4432e-02, PNorm = 67.7094, GNorm = 0.6054, lr_0 = 2.4304e-04
Loss = 4.8037e-02, PNorm = 67.7121, GNorm = 0.4183, lr_0 = 2.4288e-04
Loss = 4.6226e-02, PNorm = 67.7161, GNorm = 0.4652, lr_0 = 2.4271e-04
Loss = 4.0319e-02, PNorm = 67.7197, GNorm = 0.5429, lr_0 = 2.4254e-04
Loss = 5.1202e-02, PNorm = 67.7231, GNorm = 0.8194, lr_0 = 2.4238e-04
Loss = 4.9070e-02, PNorm = 67.7258, GNorm = 0.4794, lr_0 = 2.4221e-04
Loss = 5.2710e-02, PNorm = 67.7284, GNorm = 0.6889, lr_0 = 2.4205e-04
Loss = 5.9181e-02, PNorm = 67.7328, GNorm = 0.4777, lr_0 = 2.4188e-04
Loss = 5.5681e-02, PNorm = 67.7383, GNorm = 0.3804, lr_0 = 2.4171e-04
Loss = 4.1845e-02, PNorm = 67.7442, GNorm = 0.3646, lr_0 = 2.4155e-04
Loss = 5.2136e-02, PNorm = 67.7497, GNorm = 0.8374, lr_0 = 2.4138e-04
Loss = 4.6833e-02, PNorm = 67.7549, GNorm = 0.6292, lr_0 = 2.4122e-04
Loss = 5.1717e-02, PNorm = 67.7625, GNorm = 0.6250, lr_0 = 2.4105e-04
Loss = 5.0794e-02, PNorm = 67.7683, GNorm = 0.6132, lr_0 = 2.4089e-04
Loss = 5.8847e-02, PNorm = 67.7706, GNorm = 0.4695, lr_0 = 2.4072e-04
Loss = 5.1469e-02, PNorm = 67.7745, GNorm = 0.4367, lr_0 = 2.4056e-04
Loss = 4.2271e-02, PNorm = 67.7787, GNorm = 0.5339, lr_0 = 2.4039e-04
Loss = 4.3400e-02, PNorm = 67.7820, GNorm = 0.4488, lr_0 = 2.4023e-04
Loss = 4.6782e-02, PNorm = 67.7849, GNorm = 0.3441, lr_0 = 2.4006e-04
Loss = 4.7462e-02, PNorm = 67.7876, GNorm = 0.7004, lr_0 = 2.3990e-04
Loss = 5.2146e-02, PNorm = 67.7922, GNorm = 0.5415, lr_0 = 2.3974e-04
Loss = 4.5779e-02, PNorm = 67.7976, GNorm = 0.4508, lr_0 = 2.3957e-04
Loss = 5.3092e-02, PNorm = 67.7994, GNorm = 0.5448, lr_0 = 2.3941e-04
Loss = 4.9262e-02, PNorm = 67.8034, GNorm = 0.5793, lr_0 = 2.3924e-04
Loss = 5.0131e-02, PNorm = 67.8081, GNorm = 0.5932, lr_0 = 2.3908e-04
Loss = 5.2529e-02, PNorm = 67.8166, GNorm = 0.4532, lr_0 = 2.3892e-04
Loss = 4.9896e-02, PNorm = 67.8222, GNorm = 0.6374, lr_0 = 2.3875e-04
Loss = 5.3435e-02, PNorm = 67.8255, GNorm = 0.5831, lr_0 = 2.3859e-04
Loss = 5.0492e-02, PNorm = 67.8304, GNorm = 0.8934, lr_0 = 2.3842e-04
Loss = 5.4301e-02, PNorm = 67.8355, GNorm = 0.5817, lr_0 = 2.3826e-04
Loss = 4.6002e-02, PNorm = 67.8388, GNorm = 0.5091, lr_0 = 2.3810e-04
Loss = 5.5080e-02, PNorm = 67.8425, GNorm = 0.5009, lr_0 = 2.3794e-04
Loss = 4.6801e-02, PNorm = 67.8493, GNorm = 0.4786, lr_0 = 2.3777e-04
Loss = 5.3880e-02, PNorm = 67.8550, GNorm = 0.4793, lr_0 = 2.3761e-04
Loss = 4.8535e-02, PNorm = 67.8575, GNorm = 0.4867, lr_0 = 2.3745e-04
Loss = 5.1049e-02, PNorm = 67.8602, GNorm = 0.5958, lr_0 = 2.3728e-04
Loss = 6.0474e-02, PNorm = 67.8654, GNorm = 0.6404, lr_0 = 2.3712e-04
Loss = 4.7239e-02, PNorm = 67.8705, GNorm = 0.4690, lr_0 = 2.3696e-04
Loss = 4.6806e-02, PNorm = 67.8748, GNorm = 0.6885, lr_0 = 2.3680e-04
Loss = 5.7285e-02, PNorm = 67.8775, GNorm = 0.6121, lr_0 = 2.3663e-04
Loss = 4.4803e-02, PNorm = 67.8819, GNorm = 0.4812, lr_0 = 2.3647e-04
Loss = 5.2033e-02, PNorm = 67.8865, GNorm = 0.7323, lr_0 = 2.3631e-04
Loss = 4.7729e-02, PNorm = 67.8912, GNorm = 0.4940, lr_0 = 2.3615e-04
Loss = 4.3206e-02, PNorm = 67.8950, GNorm = 0.6250, lr_0 = 2.3599e-04
Loss = 4.9363e-02, PNorm = 67.8986, GNorm = 0.9701, lr_0 = 2.3582e-04
Loss = 5.4326e-02, PNorm = 67.9032, GNorm = 0.6904, lr_0 = 2.3566e-04
Loss = 4.4209e-02, PNorm = 67.9059, GNorm = 0.5839, lr_0 = 2.3550e-04
Loss = 4.8206e-02, PNorm = 67.9115, GNorm = 0.4120, lr_0 = 2.3534e-04
Loss = 4.8063e-02, PNorm = 67.9154, GNorm = 0.5085, lr_0 = 2.3518e-04
Loss = 4.4036e-02, PNorm = 67.9197, GNorm = 0.4409, lr_0 = 2.3502e-04
Loss = 5.4093e-02, PNorm = 67.9241, GNorm = 0.5746, lr_0 = 2.3486e-04
Loss = 4.7206e-02, PNorm = 67.9282, GNorm = 0.5792, lr_0 = 2.3470e-04
Loss = 4.8855e-02, PNorm = 67.9333, GNorm = 0.6392, lr_0 = 2.3454e-04
Loss = 5.1928e-02, PNorm = 67.9374, GNorm = 0.4718, lr_0 = 2.3437e-04
Loss = 4.5873e-02, PNorm = 67.9422, GNorm = 0.4719, lr_0 = 2.3421e-04
Loss = 5.7339e-02, PNorm = 67.9455, GNorm = 0.5815, lr_0 = 2.3405e-04
Loss = 4.8679e-02, PNorm = 67.9483, GNorm = 0.6880, lr_0 = 2.3389e-04
Loss = 5.0542e-02, PNorm = 67.9519, GNorm = 0.4265, lr_0 = 2.3373e-04
Loss = 5.1870e-02, PNorm = 67.9569, GNorm = 0.4697, lr_0 = 2.3357e-04
Loss = 4.5448e-02, PNorm = 67.9615, GNorm = 0.4304, lr_0 = 2.3341e-04
Loss = 6.1944e-02, PNorm = 67.9654, GNorm = 0.6380, lr_0 = 2.3325e-04
Loss = 5.5132e-02, PNorm = 67.9681, GNorm = 1.0189, lr_0 = 2.3309e-04
Loss = 5.0637e-02, PNorm = 67.9716, GNorm = 0.9160, lr_0 = 2.3293e-04
Loss = 5.6368e-02, PNorm = 67.9760, GNorm = 0.5885, lr_0 = 2.3277e-04
Loss = 4.4132e-02, PNorm = 67.9790, GNorm = 0.6950, lr_0 = 2.3261e-04
Loss = 6.9139e-02, PNorm = 67.9827, GNorm = 0.8782, lr_0 = 2.3246e-04
Loss = 5.2093e-02, PNorm = 67.9859, GNorm = 0.6552, lr_0 = 2.3230e-04
Loss = 4.9309e-02, PNorm = 67.9908, GNorm = 0.7742, lr_0 = 2.3214e-04
Loss = 5.0625e-02, PNorm = 67.9971, GNorm = 0.6529, lr_0 = 2.3198e-04
Loss = 5.3126e-02, PNorm = 68.0012, GNorm = 0.4702, lr_0 = 2.3182e-04
Loss = 5.6517e-02, PNorm = 68.0045, GNorm = 0.6378, lr_0 = 2.3166e-04
Loss = 4.7321e-02, PNorm = 68.0070, GNorm = 0.4613, lr_0 = 2.3150e-04
Loss = 5.7635e-02, PNorm = 68.0096, GNorm = 1.1548, lr_0 = 2.3134e-04
Loss = 5.2151e-02, PNorm = 68.0131, GNorm = 0.5307, lr_0 = 2.3118e-04
Loss = 4.7053e-02, PNorm = 68.0173, GNorm = 0.6505, lr_0 = 2.3103e-04
Loss = 4.6905e-02, PNorm = 68.0190, GNorm = 0.5627, lr_0 = 2.3087e-04
Loss = 5.7088e-02, PNorm = 68.0210, GNorm = 0.5431, lr_0 = 2.3071e-04
Loss = 5.2983e-02, PNorm = 68.0247, GNorm = 0.6229, lr_0 = 2.3055e-04
Loss = 4.4572e-02, PNorm = 68.0282, GNorm = 0.4372, lr_0 = 2.3039e-04
Loss = 5.1923e-02, PNorm = 68.0324, GNorm = 0.4820, lr_0 = 2.3024e-04
Loss = 4.7054e-02, PNorm = 68.0345, GNorm = 0.4982, lr_0 = 2.3008e-04
Loss = 5.9602e-02, PNorm = 68.0361, GNorm = 0.6046, lr_0 = 2.2992e-04
Loss = 4.7698e-02, PNorm = 68.0400, GNorm = 0.4235, lr_0 = 2.2976e-04
Loss = 5.4812e-02, PNorm = 68.0429, GNorm = 0.4635, lr_0 = 2.2961e-04
Loss = 5.8901e-02, PNorm = 68.0463, GNorm = 0.6980, lr_0 = 2.2945e-04
Loss = 4.6952e-02, PNorm = 68.0490, GNorm = 0.5026, lr_0 = 2.2929e-04
Loss = 5.9714e-02, PNorm = 68.0531, GNorm = 0.3964, lr_0 = 2.2913e-04
Loss = 4.6844e-02, PNorm = 68.0566, GNorm = 0.7029, lr_0 = 2.2898e-04
Loss = 5.1279e-02, PNorm = 68.0577, GNorm = 0.5055, lr_0 = 2.2882e-04
Loss = 5.3836e-02, PNorm = 68.0597, GNorm = 0.5014, lr_0 = 2.2866e-04
Loss = 4.8724e-02, PNorm = 68.0630, GNorm = 0.4735, lr_0 = 2.2851e-04
Loss = 4.7180e-02, PNorm = 68.0651, GNorm = 0.6719, lr_0 = 2.2835e-04
Loss = 5.3110e-02, PNorm = 68.0683, GNorm = 0.4620, lr_0 = 2.2819e-04
Loss = 5.6747e-02, PNorm = 68.0730, GNorm = 0.6441, lr_0 = 2.2804e-04
Loss = 5.5844e-02, PNorm = 68.0767, GNorm = 0.4078, lr_0 = 2.2788e-04
Loss = 5.6280e-02, PNorm = 68.0813, GNorm = 0.6439, lr_0 = 2.2773e-04
Loss = 5.0009e-02, PNorm = 68.0860, GNorm = 0.4620, lr_0 = 2.2757e-04
Validation mae = 0.390643
Epoch 20
Loss = 4.1559e-02, PNorm = 68.0899, GNorm = 0.6581, lr_0 = 2.2741e-04
Loss = 4.5881e-02, PNorm = 68.0938, GNorm = 0.5973, lr_0 = 2.2726e-04
Loss = 4.1343e-02, PNorm = 68.0979, GNorm = 0.5311, lr_0 = 2.2710e-04
Loss = 4.2432e-02, PNorm = 68.1038, GNorm = 0.3347, lr_0 = 2.2695e-04
Loss = 4.4990e-02, PNorm = 68.1081, GNorm = 0.5000, lr_0 = 2.2679e-04
Loss = 4.2445e-02, PNorm = 68.1120, GNorm = 0.5525, lr_0 = 2.2664e-04
Loss = 4.3553e-02, PNorm = 68.1145, GNorm = 0.6736, lr_0 = 2.2648e-04
Loss = 4.3257e-02, PNorm = 68.1177, GNorm = 0.5187, lr_0 = 2.2632e-04
Loss = 5.0161e-02, PNorm = 68.1208, GNorm = 0.4028, lr_0 = 2.2617e-04
Loss = 3.9045e-02, PNorm = 68.1241, GNorm = 0.4614, lr_0 = 2.2601e-04
Loss = 4.4797e-02, PNorm = 68.1287, GNorm = 0.5904, lr_0 = 2.2586e-04
Loss = 5.0440e-02, PNorm = 68.1349, GNorm = 0.4399, lr_0 = 2.2571e-04
Loss = 4.1948e-02, PNorm = 68.1396, GNorm = 0.3740, lr_0 = 2.2555e-04
Loss = 4.9275e-02, PNorm = 68.1432, GNorm = 0.5129, lr_0 = 2.2540e-04
Loss = 4.5869e-02, PNorm = 68.1487, GNorm = 0.9874, lr_0 = 2.2524e-04
Loss = 4.3033e-02, PNorm = 68.1531, GNorm = 0.6725, lr_0 = 2.2509e-04
Loss = 4.1062e-02, PNorm = 68.1553, GNorm = 0.5193, lr_0 = 2.2493e-04
Loss = 4.8188e-02, PNorm = 68.1577, GNorm = 0.6549, lr_0 = 2.2478e-04
Loss = 5.3871e-02, PNorm = 68.1620, GNorm = 0.6133, lr_0 = 2.2463e-04
Loss = 4.7319e-02, PNorm = 68.1651, GNorm = 0.5173, lr_0 = 2.2447e-04
Loss = 4.5659e-02, PNorm = 68.1691, GNorm = 0.3779, lr_0 = 2.2432e-04
Loss = 4.8330e-02, PNorm = 68.1735, GNorm = 0.4839, lr_0 = 2.2416e-04
Loss = 4.5869e-02, PNorm = 68.1794, GNorm = 0.6000, lr_0 = 2.2401e-04
Loss = 4.0333e-02, PNorm = 68.1839, GNorm = 0.3782, lr_0 = 2.2386e-04
Loss = 3.8966e-02, PNorm = 68.1886, GNorm = 0.4613, lr_0 = 2.2370e-04
Loss = 4.2983e-02, PNorm = 68.1912, GNorm = 0.4366, lr_0 = 2.2355e-04
Loss = 4.8960e-02, PNorm = 68.1939, GNorm = 0.5254, lr_0 = 2.2340e-04
Loss = 4.6560e-02, PNorm = 68.1969, GNorm = 0.5230, lr_0 = 2.2324e-04
Loss = 4.8174e-02, PNorm = 68.2013, GNorm = 0.6276, lr_0 = 2.2309e-04
Loss = 4.7934e-02, PNorm = 68.2048, GNorm = 0.4088, lr_0 = 2.2294e-04
Loss = 4.9634e-02, PNorm = 68.2085, GNorm = 0.8185, lr_0 = 2.2279e-04
Loss = 4.9816e-02, PNorm = 68.2132, GNorm = 0.8903, lr_0 = 2.2263e-04
Loss = 4.3974e-02, PNorm = 68.2174, GNorm = 0.4558, lr_0 = 2.2248e-04
Loss = 4.4289e-02, PNorm = 68.2216, GNorm = 0.4324, lr_0 = 2.2233e-04
Loss = 4.1161e-02, PNorm = 68.2258, GNorm = 0.4879, lr_0 = 2.2218e-04
Loss = 4.5463e-02, PNorm = 68.2302, GNorm = 0.4375, lr_0 = 2.2202e-04
Loss = 3.8388e-02, PNorm = 68.2321, GNorm = 0.7677, lr_0 = 2.2187e-04
Loss = 4.8233e-02, PNorm = 68.2343, GNorm = 0.4282, lr_0 = 2.2172e-04
Loss = 4.6704e-02, PNorm = 68.2386, GNorm = 0.4371, lr_0 = 2.2157e-04
Loss = 4.3623e-02, PNorm = 68.2436, GNorm = 0.6775, lr_0 = 2.2142e-04
Loss = 4.7470e-02, PNorm = 68.2464, GNorm = 0.4411, lr_0 = 2.2126e-04
Loss = 4.9353e-02, PNorm = 68.2497, GNorm = 0.4356, lr_0 = 2.2111e-04
Loss = 4.1698e-02, PNorm = 68.2523, GNorm = 0.5286, lr_0 = 2.2096e-04
Loss = 4.8982e-02, PNorm = 68.2559, GNorm = 0.6643, lr_0 = 2.2081e-04
Loss = 4.9153e-02, PNorm = 68.2593, GNorm = 0.7679, lr_0 = 2.2066e-04
Loss = 4.9644e-02, PNorm = 68.2631, GNorm = 0.5279, lr_0 = 2.2051e-04
Loss = 4.5222e-02, PNorm = 68.2679, GNorm = 0.4882, lr_0 = 2.2036e-04
Loss = 4.0927e-02, PNorm = 68.2704, GNorm = 0.4030, lr_0 = 2.2021e-04
Loss = 4.7576e-02, PNorm = 68.2729, GNorm = 0.5835, lr_0 = 2.2005e-04
Loss = 4.7423e-02, PNorm = 68.2780, GNorm = 0.6067, lr_0 = 2.1990e-04
Loss = 5.3021e-02, PNorm = 68.2832, GNorm = 0.6011, lr_0 = 2.1975e-04
Loss = 4.7091e-02, PNorm = 68.2887, GNorm = 0.5702, lr_0 = 2.1960e-04
Loss = 5.3144e-02, PNorm = 68.2923, GNorm = 0.5737, lr_0 = 2.1945e-04
Loss = 6.1913e-02, PNorm = 68.2968, GNorm = 0.4665, lr_0 = 2.1930e-04
Loss = 5.4815e-02, PNorm = 68.3042, GNorm = 0.5413, lr_0 = 2.1915e-04
Loss = 4.7535e-02, PNorm = 68.3102, GNorm = 0.5984, lr_0 = 2.1900e-04
Loss = 5.0757e-02, PNorm = 68.3141, GNorm = 0.4573, lr_0 = 2.1885e-04
Loss = 5.0006e-02, PNorm = 68.3158, GNorm = 0.9601, lr_0 = 2.1870e-04
Loss = 5.0197e-02, PNorm = 68.3173, GNorm = 0.5284, lr_0 = 2.1855e-04
Loss = 5.6434e-02, PNorm = 68.3206, GNorm = 0.3793, lr_0 = 2.1840e-04
Loss = 4.8470e-02, PNorm = 68.3242, GNorm = 0.7370, lr_0 = 2.1825e-04
Loss = 4.5373e-02, PNorm = 68.3284, GNorm = 0.4499, lr_0 = 2.1810e-04
Loss = 4.1180e-02, PNorm = 68.3325, GNorm = 0.4090, lr_0 = 2.1795e-04
Loss = 5.2557e-02, PNorm = 68.3353, GNorm = 0.8451, lr_0 = 2.1780e-04
Loss = 5.4035e-02, PNorm = 68.3398, GNorm = 0.6935, lr_0 = 2.1765e-04
Loss = 4.3200e-02, PNorm = 68.3438, GNorm = 0.4341, lr_0 = 2.1751e-04
Loss = 6.1064e-02, PNorm = 68.3467, GNorm = 0.4850, lr_0 = 2.1736e-04
Loss = 5.0842e-02, PNorm = 68.3500, GNorm = 0.4351, lr_0 = 2.1721e-04
Loss = 4.3747e-02, PNorm = 68.3530, GNorm = 0.8920, lr_0 = 2.1706e-04
Loss = 4.5302e-02, PNorm = 68.3560, GNorm = 0.5740, lr_0 = 2.1691e-04
Loss = 4.8768e-02, PNorm = 68.3597, GNorm = 0.6252, lr_0 = 2.1676e-04
Loss = 4.8972e-02, PNorm = 68.3629, GNorm = 0.3904, lr_0 = 2.1661e-04
Loss = 4.9093e-02, PNorm = 68.3690, GNorm = 0.5704, lr_0 = 2.1646e-04
Loss = 4.5410e-02, PNorm = 68.3748, GNorm = 0.5087, lr_0 = 2.1632e-04
Loss = 4.9460e-02, PNorm = 68.3784, GNorm = 0.6722, lr_0 = 2.1617e-04
Loss = 4.2275e-02, PNorm = 68.3803, GNorm = 0.4197, lr_0 = 2.1602e-04
Loss = 4.4968e-02, PNorm = 68.3839, GNorm = 0.5550, lr_0 = 2.1587e-04
Loss = 4.6463e-02, PNorm = 68.3877, GNorm = 0.5054, lr_0 = 2.1572e-04
Loss = 4.9153e-02, PNorm = 68.3907, GNorm = 0.5048, lr_0 = 2.1558e-04
Loss = 5.2678e-02, PNorm = 68.3928, GNorm = 0.4674, lr_0 = 2.1543e-04
Loss = 4.3429e-02, PNorm = 68.3961, GNorm = 0.7525, lr_0 = 2.1528e-04
Loss = 5.0816e-02, PNorm = 68.3996, GNorm = 0.7172, lr_0 = 2.1513e-04
Loss = 4.8728e-02, PNorm = 68.4015, GNorm = 0.5958, lr_0 = 2.1499e-04
Loss = 4.9886e-02, PNorm = 68.4039, GNorm = 0.4979, lr_0 = 2.1484e-04
Loss = 5.2157e-02, PNorm = 68.4058, GNorm = 0.5832, lr_0 = 2.1469e-04
Loss = 5.2857e-02, PNorm = 68.4089, GNorm = 0.8110, lr_0 = 2.1454e-04
Loss = 4.3507e-02, PNorm = 68.4146, GNorm = 0.4354, lr_0 = 2.1440e-04
Loss = 4.7382e-02, PNorm = 68.4199, GNorm = 0.5455, lr_0 = 2.1425e-04
Loss = 5.6439e-02, PNorm = 68.4232, GNorm = 0.4257, lr_0 = 2.1410e-04
Loss = 4.5975e-02, PNorm = 68.4258, GNorm = 0.5165, lr_0 = 2.1396e-04
Loss = 4.9205e-02, PNorm = 68.4291, GNorm = 0.5451, lr_0 = 2.1381e-04
Loss = 4.3464e-02, PNorm = 68.4332, GNorm = 0.6251, lr_0 = 2.1366e-04
Loss = 4.8515e-02, PNorm = 68.4379, GNorm = 0.6255, lr_0 = 2.1352e-04
Loss = 4.6271e-02, PNorm = 68.4417, GNorm = 0.5211, lr_0 = 2.1337e-04
Loss = 4.3413e-02, PNorm = 68.4456, GNorm = 0.5470, lr_0 = 2.1323e-04
Loss = 5.5932e-02, PNorm = 68.4513, GNorm = 0.3800, lr_0 = 2.1308e-04
Loss = 4.8307e-02, PNorm = 68.4552, GNorm = 0.4357, lr_0 = 2.1293e-04
Loss = 6.0798e-02, PNorm = 68.4608, GNorm = 0.6361, lr_0 = 2.1279e-04
Loss = 5.7899e-02, PNorm = 68.4635, GNorm = 0.8214, lr_0 = 2.1264e-04
Loss = 4.7168e-02, PNorm = 68.4655, GNorm = 0.4520, lr_0 = 2.1250e-04
Loss = 4.2664e-02, PNorm = 68.4684, GNorm = 0.7407, lr_0 = 2.1235e-04
Loss = 5.4041e-02, PNorm = 68.4728, GNorm = 0.6463, lr_0 = 2.1221e-04
Loss = 4.7402e-02, PNorm = 68.4762, GNorm = 0.5028, lr_0 = 2.1206e-04
Loss = 4.4931e-02, PNorm = 68.4785, GNorm = 0.6986, lr_0 = 2.1191e-04
Loss = 4.5420e-02, PNorm = 68.4809, GNorm = 0.5080, lr_0 = 2.1177e-04
Loss = 4.7801e-02, PNorm = 68.4829, GNorm = 0.5250, lr_0 = 2.1162e-04
Loss = 3.9025e-02, PNorm = 68.4856, GNorm = 0.4216, lr_0 = 2.1148e-04
Loss = 4.8878e-02, PNorm = 68.4887, GNorm = 0.8619, lr_0 = 2.1133e-04
Loss = 5.4354e-02, PNorm = 68.4915, GNorm = 0.6762, lr_0 = 2.1119e-04
Loss = 5.3852e-02, PNorm = 68.4960, GNorm = 0.6195, lr_0 = 2.1104e-04
Loss = 6.1974e-02, PNorm = 68.5005, GNorm = 0.6272, lr_0 = 2.1090e-04
Loss = 5.0306e-02, PNorm = 68.5012, GNorm = 0.5031, lr_0 = 2.1076e-04
Loss = 4.0934e-02, PNorm = 68.5031, GNorm = 0.4938, lr_0 = 2.1061e-04
Loss = 4.6168e-02, PNorm = 68.5047, GNorm = 0.5399, lr_0 = 2.1047e-04
Loss = 4.6386e-02, PNorm = 68.5077, GNorm = 0.4422, lr_0 = 2.1032e-04
Loss = 4.5708e-02, PNorm = 68.5094, GNorm = 0.5853, lr_0 = 2.1018e-04
Loss = 4.9276e-02, PNorm = 68.5117, GNorm = 0.5287, lr_0 = 2.1003e-04
Loss = 4.6305e-02, PNorm = 68.5130, GNorm = 0.4826, lr_0 = 2.0989e-04
Loss = 5.2223e-02, PNorm = 68.5169, GNorm = 0.5720, lr_0 = 2.0975e-04
Loss = 5.0565e-02, PNorm = 68.5203, GNorm = 0.7374, lr_0 = 2.0960e-04
Validation mae = 0.390819
Epoch 21
Loss = 4.2622e-02, PNorm = 68.5239, GNorm = 0.8155, lr_0 = 2.0946e-04
Loss = 4.1512e-02, PNorm = 68.5279, GNorm = 0.5030, lr_0 = 2.0932e-04
Loss = 3.9782e-02, PNorm = 68.5331, GNorm = 0.6299, lr_0 = 2.0917e-04
Loss = 4.1660e-02, PNorm = 68.5363, GNorm = 1.1156, lr_0 = 2.0903e-04
Loss = 4.2970e-02, PNorm = 68.5405, GNorm = 0.4204, lr_0 = 2.0889e-04
Loss = 4.5937e-02, PNorm = 68.5445, GNorm = 0.5225, lr_0 = 2.0874e-04
Loss = 3.9621e-02, PNorm = 68.5478, GNorm = 0.6264, lr_0 = 2.0860e-04
Loss = 3.8975e-02, PNorm = 68.5497, GNorm = 0.4830, lr_0 = 2.0846e-04
Loss = 4.2258e-02, PNorm = 68.5527, GNorm = 0.6692, lr_0 = 2.0831e-04
Loss = 3.7809e-02, PNorm = 68.5559, GNorm = 0.4310, lr_0 = 2.0817e-04
Loss = 4.4166e-02, PNorm = 68.5594, GNorm = 0.3918, lr_0 = 2.0803e-04
Loss = 4.8111e-02, PNorm = 68.5641, GNorm = 0.7392, lr_0 = 2.0789e-04
Loss = 4.0711e-02, PNorm = 68.5677, GNorm = 0.7155, lr_0 = 2.0774e-04
Loss = 4.6736e-02, PNorm = 68.5699, GNorm = 0.5651, lr_0 = 2.0760e-04
Loss = 4.1589e-02, PNorm = 68.5735, GNorm = 0.5816, lr_0 = 2.0746e-04
Loss = 4.1770e-02, PNorm = 68.5759, GNorm = 0.3076, lr_0 = 2.0732e-04
Loss = 4.2377e-02, PNorm = 68.5789, GNorm = 0.4702, lr_0 = 2.0718e-04
Loss = 4.1914e-02, PNorm = 68.5822, GNorm = 0.4340, lr_0 = 2.0703e-04
Loss = 3.8116e-02, PNorm = 68.5851, GNorm = 0.4808, lr_0 = 2.0689e-04
Loss = 3.9023e-02, PNorm = 68.5873, GNorm = 0.7053, lr_0 = 2.0675e-04
Loss = 3.8177e-02, PNorm = 68.5919, GNorm = 0.4447, lr_0 = 2.0661e-04
Loss = 4.2756e-02, PNorm = 68.5947, GNorm = 0.7256, lr_0 = 2.0647e-04
Loss = 4.4944e-02, PNorm = 68.5984, GNorm = 0.4872, lr_0 = 2.0633e-04
Loss = 4.8672e-02, PNorm = 68.6044, GNorm = 0.5035, lr_0 = 2.0618e-04
Loss = 4.2342e-02, PNorm = 68.6084, GNorm = 0.3845, lr_0 = 2.0604e-04
Loss = 4.3445e-02, PNorm = 68.6115, GNorm = 0.5542, lr_0 = 2.0590e-04
Loss = 4.4842e-02, PNorm = 68.6151, GNorm = 0.8621, lr_0 = 2.0576e-04
Loss = 3.9372e-02, PNorm = 68.6185, GNorm = 0.3725, lr_0 = 2.0562e-04
Loss = 4.0099e-02, PNorm = 68.6215, GNorm = 0.4556, lr_0 = 2.0548e-04
Loss = 3.8973e-02, PNorm = 68.6239, GNorm = 0.7016, lr_0 = 2.0534e-04
Loss = 4.3777e-02, PNorm = 68.6277, GNorm = 0.9355, lr_0 = 2.0520e-04
Loss = 4.7936e-02, PNorm = 68.6304, GNorm = 0.5198, lr_0 = 2.0506e-04
Loss = 4.5703e-02, PNorm = 68.6330, GNorm = 0.4452, lr_0 = 2.0492e-04
Loss = 4.7093e-02, PNorm = 68.6357, GNorm = 0.4525, lr_0 = 2.0478e-04
Loss = 4.3150e-02, PNorm = 68.6380, GNorm = 0.4186, lr_0 = 2.0464e-04
Loss = 4.6578e-02, PNorm = 68.6412, GNorm = 0.5760, lr_0 = 2.0450e-04
Loss = 3.3140e-02, PNorm = 68.6429, GNorm = 0.3016, lr_0 = 2.0436e-04
Loss = 4.5791e-02, PNorm = 68.6462, GNorm = 0.5522, lr_0 = 2.0422e-04
Loss = 4.9684e-02, PNorm = 68.6500, GNorm = 0.5161, lr_0 = 2.0408e-04
Loss = 5.2493e-02, PNorm = 68.6552, GNorm = 0.4636, lr_0 = 2.0394e-04
Loss = 4.8213e-02, PNorm = 68.6605, GNorm = 0.6807, lr_0 = 2.0380e-04
Loss = 4.3458e-02, PNorm = 68.6638, GNorm = 0.5125, lr_0 = 2.0366e-04
Loss = 4.5160e-02, PNorm = 68.6650, GNorm = 0.3268, lr_0 = 2.0352e-04
Loss = 4.8219e-02, PNorm = 68.6684, GNorm = 0.8690, lr_0 = 2.0338e-04
Loss = 4.5925e-02, PNorm = 68.6707, GNorm = 0.6134, lr_0 = 2.0324e-04
Loss = 4.2712e-02, PNorm = 68.6731, GNorm = 0.4760, lr_0 = 2.0310e-04
Loss = 4.3661e-02, PNorm = 68.6770, GNorm = 0.5693, lr_0 = 2.0296e-04
Loss = 4.2332e-02, PNorm = 68.6814, GNorm = 0.3763, lr_0 = 2.0282e-04
Loss = 4.5999e-02, PNorm = 68.6856, GNorm = 0.5101, lr_0 = 2.0268e-04
Loss = 3.9641e-02, PNorm = 68.6886, GNorm = 0.5279, lr_0 = 2.0254e-04
Loss = 4.2467e-02, PNorm = 68.6910, GNorm = 0.5145, lr_0 = 2.0240e-04
Loss = 3.9284e-02, PNorm = 68.6937, GNorm = 0.4782, lr_0 = 2.0227e-04
Loss = 4.1554e-02, PNorm = 68.6964, GNorm = 0.6825, lr_0 = 2.0213e-04
Loss = 3.7816e-02, PNorm = 68.7007, GNorm = 0.5708, lr_0 = 2.0199e-04
Loss = 3.9196e-02, PNorm = 68.7049, GNorm = 0.5768, lr_0 = 2.0185e-04
Loss = 4.6674e-02, PNorm = 68.7074, GNorm = 0.8469, lr_0 = 2.0171e-04
Loss = 5.4507e-02, PNorm = 68.7121, GNorm = 0.5341, lr_0 = 2.0157e-04
Loss = 4.0506e-02, PNorm = 68.7158, GNorm = 0.3851, lr_0 = 2.0144e-04
Loss = 4.5286e-02, PNorm = 68.7192, GNorm = 0.7547, lr_0 = 2.0130e-04
Loss = 5.3933e-02, PNorm = 68.7226, GNorm = 0.6681, lr_0 = 2.0116e-04
Loss = 5.4012e-02, PNorm = 68.7250, GNorm = 0.6320, lr_0 = 2.0102e-04
Loss = 4.4256e-02, PNorm = 68.7286, GNorm = 0.6446, lr_0 = 2.0088e-04
Loss = 4.3927e-02, PNorm = 68.7326, GNorm = 0.4251, lr_0 = 2.0075e-04
Loss = 5.5739e-02, PNorm = 68.7369, GNorm = 0.4190, lr_0 = 2.0061e-04
Loss = 4.7066e-02, PNorm = 68.7400, GNorm = 0.6654, lr_0 = 2.0047e-04
Loss = 4.6534e-02, PNorm = 68.7442, GNorm = 0.5126, lr_0 = 2.0033e-04
Loss = 4.2878e-02, PNorm = 68.7475, GNorm = 0.4844, lr_0 = 2.0020e-04
Loss = 4.5141e-02, PNorm = 68.7503, GNorm = 0.5405, lr_0 = 2.0006e-04
Loss = 4.4479e-02, PNorm = 68.7524, GNorm = 0.6174, lr_0 = 1.9992e-04
Loss = 5.4341e-02, PNorm = 68.7548, GNorm = 0.5694, lr_0 = 1.9979e-04
Loss = 4.3278e-02, PNorm = 68.7588, GNorm = 0.5711, lr_0 = 1.9965e-04
Loss = 4.4676e-02, PNorm = 68.7623, GNorm = 0.4109, lr_0 = 1.9951e-04
Loss = 4.7059e-02, PNorm = 68.7666, GNorm = 0.5148, lr_0 = 1.9938e-04
Loss = 4.6589e-02, PNorm = 68.7690, GNorm = 0.5393, lr_0 = 1.9924e-04
Loss = 4.9450e-02, PNorm = 68.7722, GNorm = 0.4343, lr_0 = 1.9910e-04
Loss = 4.2422e-02, PNorm = 68.7751, GNorm = 0.7741, lr_0 = 1.9897e-04
Loss = 4.4840e-02, PNorm = 68.7783, GNorm = 0.7219, lr_0 = 1.9883e-04
Loss = 4.1718e-02, PNorm = 68.7818, GNorm = 0.7123, lr_0 = 1.9869e-04
Loss = 4.2164e-02, PNorm = 68.7852, GNorm = 0.5688, lr_0 = 1.9856e-04
Loss = 4.3218e-02, PNorm = 68.7883, GNorm = 0.4526, lr_0 = 1.9842e-04
Loss = 5.0204e-02, PNorm = 68.7918, GNorm = 0.5292, lr_0 = 1.9829e-04
Loss = 4.0696e-02, PNorm = 68.7959, GNorm = 0.4304, lr_0 = 1.9815e-04
Loss = 4.9325e-02, PNorm = 68.8006, GNorm = 0.3835, lr_0 = 1.9801e-04
Loss = 3.8446e-02, PNorm = 68.8044, GNorm = 0.4100, lr_0 = 1.9788e-04
Loss = 5.2390e-02, PNorm = 68.8059, GNorm = 0.7240, lr_0 = 1.9774e-04
Loss = 4.7811e-02, PNorm = 68.8078, GNorm = 0.4410, lr_0 = 1.9761e-04
Loss = 4.8433e-02, PNorm = 68.8113, GNorm = 0.4589, lr_0 = 1.9747e-04
Loss = 4.1606e-02, PNorm = 68.8131, GNorm = 0.4038, lr_0 = 1.9734e-04
Loss = 4.8540e-02, PNorm = 68.8157, GNorm = 0.5122, lr_0 = 1.9720e-04
Loss = 5.0057e-02, PNorm = 68.8200, GNorm = 0.3915, lr_0 = 1.9707e-04
Loss = 4.2319e-02, PNorm = 68.8227, GNorm = 0.4531, lr_0 = 1.9693e-04
Loss = 4.1024e-02, PNorm = 68.8257, GNorm = 0.4043, lr_0 = 1.9680e-04
Loss = 4.4526e-02, PNorm = 68.8275, GNorm = 0.5379, lr_0 = 1.9666e-04
Loss = 4.4954e-02, PNorm = 68.8307, GNorm = 0.8059, lr_0 = 1.9653e-04
Loss = 4.5144e-02, PNorm = 68.8348, GNorm = 0.5045, lr_0 = 1.9639e-04
Loss = 4.9106e-02, PNorm = 68.8395, GNorm = 0.5831, lr_0 = 1.9626e-04
Loss = 4.2887e-02, PNorm = 68.8417, GNorm = 0.5805, lr_0 = 1.9612e-04
Loss = 4.5853e-02, PNorm = 68.8438, GNorm = 0.5271, lr_0 = 1.9599e-04
Loss = 5.4084e-02, PNorm = 68.8482, GNorm = 0.4457, lr_0 = 1.9585e-04
Loss = 4.3746e-02, PNorm = 68.8508, GNorm = 0.5803, lr_0 = 1.9572e-04
Loss = 5.1936e-02, PNorm = 68.8530, GNorm = 0.7513, lr_0 = 1.9559e-04
Loss = 4.4180e-02, PNorm = 68.8549, GNorm = 0.5812, lr_0 = 1.9545e-04
Loss = 5.1216e-02, PNorm = 68.8574, GNorm = 0.3781, lr_0 = 1.9532e-04
Loss = 4.3690e-02, PNorm = 68.8587, GNorm = 0.6796, lr_0 = 1.9518e-04
Loss = 4.4079e-02, PNorm = 68.8623, GNorm = 0.5749, lr_0 = 1.9505e-04
Loss = 4.2948e-02, PNorm = 68.8663, GNorm = 0.4817, lr_0 = 1.9492e-04
Loss = 5.0855e-02, PNorm = 68.8684, GNorm = 0.5668, lr_0 = 1.9478e-04
Loss = 4.8726e-02, PNorm = 68.8706, GNorm = 0.6378, lr_0 = 1.9465e-04
Loss = 4.4646e-02, PNorm = 68.8729, GNorm = 0.3897, lr_0 = 1.9452e-04
Loss = 4.1003e-02, PNorm = 68.8752, GNorm = 0.5810, lr_0 = 1.9438e-04
Loss = 4.7124e-02, PNorm = 68.8791, GNorm = 0.5752, lr_0 = 1.9425e-04
Loss = 4.1402e-02, PNorm = 68.8824, GNorm = 0.4384, lr_0 = 1.9412e-04
Loss = 5.3498e-02, PNorm = 68.8859, GNorm = 0.5562, lr_0 = 1.9398e-04
Loss = 5.3242e-02, PNorm = 68.8885, GNorm = 0.6761, lr_0 = 1.9385e-04
Loss = 5.1417e-02, PNorm = 68.8917, GNorm = 0.5308, lr_0 = 1.9372e-04
Loss = 4.0690e-02, PNorm = 68.8964, GNorm = 0.5345, lr_0 = 1.9359e-04
Loss = 5.0183e-02, PNorm = 68.9002, GNorm = 0.3524, lr_0 = 1.9345e-04
Loss = 4.9837e-02, PNorm = 68.9023, GNorm = 0.5922, lr_0 = 1.9332e-04
Loss = 4.4992e-02, PNorm = 68.9045, GNorm = 0.5571, lr_0 = 1.9319e-04
Loss = 4.7600e-02, PNorm = 68.9071, GNorm = 0.6594, lr_0 = 1.9306e-04
Validation mae = 0.391105
Epoch 22
Loss = 3.7308e-02, PNorm = 68.9094, GNorm = 0.4823, lr_0 = 1.9292e-04
Loss = 4.1339e-02, PNorm = 68.9123, GNorm = 0.5848, lr_0 = 1.9279e-04
Loss = 3.8995e-02, PNorm = 68.9148, GNorm = 0.5763, lr_0 = 1.9266e-04
Loss = 3.6771e-02, PNorm = 68.9169, GNorm = 0.5057, lr_0 = 1.9253e-04
Loss = 4.2366e-02, PNorm = 68.9192, GNorm = 0.5583, lr_0 = 1.9240e-04
Loss = 3.7284e-02, PNorm = 68.9233, GNorm = 0.4190, lr_0 = 1.9226e-04
Loss = 3.9359e-02, PNorm = 68.9267, GNorm = 0.4664, lr_0 = 1.9213e-04
Loss = 3.8477e-02, PNorm = 68.9308, GNorm = 0.5210, lr_0 = 1.9200e-04
Loss = 3.8084e-02, PNorm = 68.9341, GNorm = 0.3745, lr_0 = 1.9187e-04
Loss = 3.9508e-02, PNorm = 68.9365, GNorm = 0.4102, lr_0 = 1.9174e-04
Loss = 3.4289e-02, PNorm = 68.9388, GNorm = 0.4634, lr_0 = 1.9161e-04
Loss = 4.0179e-02, PNorm = 68.9406, GNorm = 0.5498, lr_0 = 1.9148e-04
Loss = 3.7787e-02, PNorm = 68.9428, GNorm = 0.3955, lr_0 = 1.9134e-04
Loss = 4.0079e-02, PNorm = 68.9444, GNorm = 0.3987, lr_0 = 1.9121e-04
Loss = 3.3004e-02, PNorm = 68.9456, GNorm = 0.3935, lr_0 = 1.9108e-04
Loss = 3.6756e-02, PNorm = 68.9479, GNorm = 0.5500, lr_0 = 1.9095e-04
Loss = 3.4790e-02, PNorm = 68.9502, GNorm = 0.4356, lr_0 = 1.9082e-04
Loss = 4.2859e-02, PNorm = 68.9538, GNorm = 0.4922, lr_0 = 1.9069e-04
Loss = 3.7056e-02, PNorm = 68.9566, GNorm = 0.3731, lr_0 = 1.9056e-04
Loss = 4.3828e-02, PNorm = 68.9597, GNorm = 0.6130, lr_0 = 1.9043e-04
Loss = 4.8575e-02, PNorm = 68.9630, GNorm = 0.5053, lr_0 = 1.9030e-04
Loss = 3.9861e-02, PNorm = 68.9655, GNorm = 0.6106, lr_0 = 1.9017e-04
Loss = 4.6246e-02, PNorm = 68.9693, GNorm = 0.6865, lr_0 = 1.9004e-04
Loss = 3.9445e-02, PNorm = 68.9720, GNorm = 0.5667, lr_0 = 1.8991e-04
Loss = 4.2388e-02, PNorm = 68.9744, GNorm = 0.8633, lr_0 = 1.8978e-04
Loss = 3.8592e-02, PNorm = 68.9774, GNorm = 0.3500, lr_0 = 1.8965e-04
Loss = 4.5404e-02, PNorm = 68.9817, GNorm = 0.4733, lr_0 = 1.8952e-04
Loss = 4.3790e-02, PNorm = 68.9855, GNorm = 0.6538, lr_0 = 1.8939e-04
Loss = 3.5433e-02, PNorm = 68.9864, GNorm = 0.4732, lr_0 = 1.8926e-04
Loss = 4.4753e-02, PNorm = 68.9881, GNorm = 0.4152, lr_0 = 1.8913e-04
Loss = 4.4706e-02, PNorm = 68.9924, GNorm = 0.5786, lr_0 = 1.8900e-04
Loss = 4.1706e-02, PNorm = 68.9964, GNorm = 0.4983, lr_0 = 1.8887e-04
Loss = 4.0450e-02, PNorm = 68.9989, GNorm = 0.6930, lr_0 = 1.8874e-04
Loss = 3.9549e-02, PNorm = 69.0015, GNorm = 0.4087, lr_0 = 1.8861e-04
Loss = 3.7246e-02, PNorm = 69.0039, GNorm = 0.5834, lr_0 = 1.8848e-04
Loss = 4.2719e-02, PNorm = 69.0068, GNorm = 0.6312, lr_0 = 1.8835e-04
Loss = 3.9728e-02, PNorm = 69.0102, GNorm = 0.4657, lr_0 = 1.8822e-04
Loss = 3.6485e-02, PNorm = 69.0121, GNorm = 0.4417, lr_0 = 1.8809e-04
Loss = 3.9576e-02, PNorm = 69.0148, GNorm = 0.6261, lr_0 = 1.8797e-04
Loss = 4.3534e-02, PNorm = 69.0192, GNorm = 0.5820, lr_0 = 1.8784e-04
Loss = 4.0662e-02, PNorm = 69.0229, GNorm = 0.6948, lr_0 = 1.8771e-04
Loss = 4.1911e-02, PNorm = 69.0259, GNorm = 0.4506, lr_0 = 1.8758e-04
Loss = 4.2943e-02, PNorm = 69.0296, GNorm = 0.5296, lr_0 = 1.8745e-04
Loss = 4.2373e-02, PNorm = 69.0337, GNorm = 0.3454, lr_0 = 1.8732e-04
Loss = 3.7663e-02, PNorm = 69.0373, GNorm = 0.5203, lr_0 = 1.8719e-04
Loss = 4.5395e-02, PNorm = 69.0423, GNorm = 0.5234, lr_0 = 1.8707e-04
Loss = 4.4551e-02, PNorm = 69.0459, GNorm = 0.4560, lr_0 = 1.8694e-04
Loss = 4.7177e-02, PNorm = 69.0472, GNorm = 0.5606, lr_0 = 1.8681e-04
Loss = 4.3596e-02, PNorm = 69.0502, GNorm = 0.5770, lr_0 = 1.8668e-04
Loss = 3.9826e-02, PNorm = 69.0536, GNorm = 1.0224, lr_0 = 1.8655e-04
Loss = 4.2658e-02, PNorm = 69.0578, GNorm = 0.4524, lr_0 = 1.8643e-04
Loss = 4.4291e-02, PNorm = 69.0623, GNorm = 0.8355, lr_0 = 1.8630e-04
Loss = 4.5711e-02, PNorm = 69.0649, GNorm = 0.6041, lr_0 = 1.8617e-04
Loss = 4.7317e-02, PNorm = 69.0661, GNorm = 0.8697, lr_0 = 1.8604e-04
Loss = 4.6491e-02, PNorm = 69.0687, GNorm = 0.7592, lr_0 = 1.8592e-04
Loss = 4.4656e-02, PNorm = 69.0715, GNorm = 0.6061, lr_0 = 1.8579e-04
Loss = 4.3332e-02, PNorm = 69.0731, GNorm = 0.4909, lr_0 = 1.8566e-04
Loss = 4.3287e-02, PNorm = 69.0759, GNorm = 0.4303, lr_0 = 1.8553e-04
Loss = 3.8684e-02, PNorm = 69.0783, GNorm = 0.4879, lr_0 = 1.8541e-04
Loss = 3.5803e-02, PNorm = 69.0805, GNorm = 0.6285, lr_0 = 1.8528e-04
Loss = 4.4153e-02, PNorm = 69.0826, GNorm = 0.4863, lr_0 = 1.8515e-04
Loss = 3.9513e-02, PNorm = 69.0855, GNorm = 0.6304, lr_0 = 1.8503e-04
Loss = 4.9799e-02, PNorm = 69.0889, GNorm = 0.5068, lr_0 = 1.8490e-04
Loss = 4.6274e-02, PNorm = 69.0915, GNorm = 0.5849, lr_0 = 1.8477e-04
Loss = 4.7000e-02, PNorm = 69.0931, GNorm = 0.5551, lr_0 = 1.8465e-04
Loss = 4.9114e-02, PNorm = 69.0963, GNorm = 0.4406, lr_0 = 1.8452e-04
Loss = 4.3048e-02, PNorm = 69.1000, GNorm = 0.7049, lr_0 = 1.8439e-04
Loss = 3.9731e-02, PNorm = 69.1030, GNorm = 0.5320, lr_0 = 1.8427e-04
Loss = 4.4417e-02, PNorm = 69.1065, GNorm = 0.7559, lr_0 = 1.8414e-04
Loss = 4.3129e-02, PNorm = 69.1089, GNorm = 0.4310, lr_0 = 1.8401e-04
Loss = 4.1130e-02, PNorm = 69.1125, GNorm = 0.3627, lr_0 = 1.8389e-04
Loss = 3.8935e-02, PNorm = 69.1159, GNorm = 0.4754, lr_0 = 1.8376e-04
Loss = 3.6217e-02, PNorm = 69.1193, GNorm = 0.3740, lr_0 = 1.8364e-04
Loss = 4.9654e-02, PNorm = 69.1219, GNorm = 0.4317, lr_0 = 1.8351e-04
Loss = 3.8560e-02, PNorm = 69.1250, GNorm = 0.3823, lr_0 = 1.8338e-04
Loss = 4.0096e-02, PNorm = 69.1281, GNorm = 0.5636, lr_0 = 1.8326e-04
Loss = 4.6304e-02, PNorm = 69.1314, GNorm = 0.5001, lr_0 = 1.8313e-04
Loss = 4.1471e-02, PNorm = 69.1354, GNorm = 0.7520, lr_0 = 1.8301e-04
Loss = 4.2790e-02, PNorm = 69.1378, GNorm = 0.5117, lr_0 = 1.8288e-04
Loss = 4.1710e-02, PNorm = 69.1404, GNorm = 0.6514, lr_0 = 1.8276e-04
Loss = 4.5974e-02, PNorm = 69.1431, GNorm = 0.5037, lr_0 = 1.8263e-04
Loss = 4.4204e-02, PNorm = 69.1470, GNorm = 0.6325, lr_0 = 1.8251e-04
Loss = 4.8355e-02, PNorm = 69.1503, GNorm = 0.8248, lr_0 = 1.8238e-04
Loss = 3.9818e-02, PNorm = 69.1544, GNorm = 0.5698, lr_0 = 1.8226e-04
Loss = 4.6616e-02, PNorm = 69.1573, GNorm = 0.7366, lr_0 = 1.8213e-04
Loss = 4.5762e-02, PNorm = 69.1591, GNorm = 1.1030, lr_0 = 1.8201e-04
Loss = 4.9379e-02, PNorm = 69.1612, GNorm = 0.7134, lr_0 = 1.8188e-04
Loss = 4.4997e-02, PNorm = 69.1639, GNorm = 0.6542, lr_0 = 1.8176e-04
Loss = 4.3273e-02, PNorm = 69.1659, GNorm = 0.6181, lr_0 = 1.8163e-04
Loss = 5.0068e-02, PNorm = 69.1691, GNorm = 0.3933, lr_0 = 1.8151e-04
Loss = 4.8863e-02, PNorm = 69.1732, GNorm = 0.4292, lr_0 = 1.8138e-04
Loss = 4.5058e-02, PNorm = 69.1760, GNorm = 0.5091, lr_0 = 1.8126e-04
Loss = 4.4308e-02, PNorm = 69.1782, GNorm = 0.7226, lr_0 = 1.8114e-04
Loss = 4.4848e-02, PNorm = 69.1806, GNorm = 0.5855, lr_0 = 1.8101e-04
Loss = 4.7429e-02, PNorm = 69.1844, GNorm = 0.5397, lr_0 = 1.8089e-04
Loss = 4.5790e-02, PNorm = 69.1871, GNorm = 0.5224, lr_0 = 1.8076e-04
Loss = 3.9245e-02, PNorm = 69.1899, GNorm = 0.3808, lr_0 = 1.8064e-04
Loss = 4.5007e-02, PNorm = 69.1923, GNorm = 0.5139, lr_0 = 1.8052e-04
Loss = 3.8962e-02, PNorm = 69.1931, GNorm = 0.6612, lr_0 = 1.8039e-04
Loss = 4.7792e-02, PNorm = 69.1952, GNorm = 0.5161, lr_0 = 1.8027e-04
Loss = 5.2546e-02, PNorm = 69.1996, GNorm = 0.6486, lr_0 = 1.8015e-04
Loss = 4.8383e-02, PNorm = 69.2022, GNorm = 0.4479, lr_0 = 1.8002e-04
Loss = 4.1352e-02, PNorm = 69.2044, GNorm = 0.5074, lr_0 = 1.7990e-04
Loss = 4.3473e-02, PNorm = 69.2065, GNorm = 0.5722, lr_0 = 1.7978e-04
Loss = 4.1325e-02, PNorm = 69.2078, GNorm = 0.5764, lr_0 = 1.7965e-04
Loss = 4.2400e-02, PNorm = 69.2101, GNorm = 0.5355, lr_0 = 1.7953e-04
Loss = 4.4972e-02, PNorm = 69.2135, GNorm = 0.4787, lr_0 = 1.7941e-04
Loss = 4.4195e-02, PNorm = 69.2171, GNorm = 0.6686, lr_0 = 1.7928e-04
Loss = 4.9505e-02, PNorm = 69.2214, GNorm = 0.6374, lr_0 = 1.7916e-04
Loss = 5.2930e-02, PNorm = 69.2246, GNorm = 0.8713, lr_0 = 1.7904e-04
Loss = 4.2408e-02, PNorm = 69.2268, GNorm = 0.5317, lr_0 = 1.7892e-04
Loss = 3.9314e-02, PNorm = 69.2287, GNorm = 0.5842, lr_0 = 1.7879e-04
Loss = 4.0516e-02, PNorm = 69.2305, GNorm = 0.8796, lr_0 = 1.7867e-04
Loss = 4.7159e-02, PNorm = 69.2332, GNorm = 0.5735, lr_0 = 1.7855e-04
Loss = 4.9460e-02, PNorm = 69.2350, GNorm = 0.5132, lr_0 = 1.7843e-04
Loss = 3.9722e-02, PNorm = 69.2366, GNorm = 0.6299, lr_0 = 1.7830e-04
Loss = 4.0075e-02, PNorm = 69.2385, GNorm = 0.5597, lr_0 = 1.7818e-04
Loss = 4.2376e-02, PNorm = 69.2412, GNorm = 0.4530, lr_0 = 1.7806e-04
Loss = 4.8575e-02, PNorm = 69.2420, GNorm = 0.6931, lr_0 = 1.7794e-04
Loss = 4.1391e-02, PNorm = 69.2448, GNorm = 0.5483, lr_0 = 1.7782e-04
Validation mae = 0.384592
Epoch 23
Loss = 4.7307e-02, PNorm = 69.2471, GNorm = 0.4220, lr_0 = 1.7769e-04
Loss = 3.3379e-02, PNorm = 69.2503, GNorm = 0.3556, lr_0 = 1.7757e-04
Loss = 3.6695e-02, PNorm = 69.2529, GNorm = 0.3716, lr_0 = 1.7745e-04
Loss = 3.3222e-02, PNorm = 69.2551, GNorm = 0.5972, lr_0 = 1.7733e-04
Loss = 3.4962e-02, PNorm = 69.2574, GNorm = 0.3862, lr_0 = 1.7721e-04
Loss = 3.8278e-02, PNorm = 69.2608, GNorm = 0.4163, lr_0 = 1.7709e-04
Loss = 3.2176e-02, PNorm = 69.2636, GNorm = 0.4322, lr_0 = 1.7696e-04
Loss = 3.3134e-02, PNorm = 69.2673, GNorm = 0.4350, lr_0 = 1.7684e-04
Loss = 2.8572e-02, PNorm = 69.2711, GNorm = 0.3577, lr_0 = 1.7672e-04
Loss = 3.8915e-02, PNorm = 69.2746, GNorm = 0.6438, lr_0 = 1.7660e-04
Loss = 3.7436e-02, PNorm = 69.2757, GNorm = 0.4900, lr_0 = 1.7648e-04
Loss = 3.0703e-02, PNorm = 69.2777, GNorm = 0.6488, lr_0 = 1.7636e-04
Loss = 3.4653e-02, PNorm = 69.2814, GNorm = 0.4819, lr_0 = 1.7624e-04
Loss = 3.9199e-02, PNorm = 69.2857, GNorm = 0.5526, lr_0 = 1.7612e-04
Loss = 3.6581e-02, PNorm = 69.2882, GNorm = 0.6352, lr_0 = 1.7600e-04
Loss = 3.8763e-02, PNorm = 69.2901, GNorm = 0.5763, lr_0 = 1.7588e-04
Loss = 3.8590e-02, PNorm = 69.2934, GNorm = 0.5979, lr_0 = 1.7576e-04
Loss = 3.4958e-02, PNorm = 69.2950, GNorm = 0.4800, lr_0 = 1.7564e-04
Loss = 3.6864e-02, PNorm = 69.2967, GNorm = 0.5098, lr_0 = 1.7552e-04
Loss = 3.8920e-02, PNorm = 69.2993, GNorm = 0.5343, lr_0 = 1.7540e-04
Loss = 3.6217e-02, PNorm = 69.3024, GNorm = 0.7537, lr_0 = 1.7528e-04
Loss = 3.3371e-02, PNorm = 69.3062, GNorm = 0.5991, lr_0 = 1.7516e-04
Loss = 3.7922e-02, PNorm = 69.3086, GNorm = 0.3363, lr_0 = 1.7504e-04
Loss = 4.0004e-02, PNorm = 69.3121, GNorm = 0.4810, lr_0 = 1.7492e-04
Loss = 3.1453e-02, PNorm = 69.3137, GNorm = 0.6349, lr_0 = 1.7480e-04
Loss = 3.2073e-02, PNorm = 69.3149, GNorm = 0.4339, lr_0 = 1.7468e-04
Loss = 3.6832e-02, PNorm = 69.3164, GNorm = 0.3795, lr_0 = 1.7456e-04
Loss = 3.5480e-02, PNorm = 69.3190, GNorm = 0.4935, lr_0 = 1.7444e-04
Loss = 3.7855e-02, PNorm = 69.3212, GNorm = 0.6321, lr_0 = 1.7432e-04
Loss = 3.2988e-02, PNorm = 69.3246, GNorm = 0.4743, lr_0 = 1.7420e-04
Loss = 4.0546e-02, PNorm = 69.3280, GNorm = 0.6136, lr_0 = 1.7408e-04
Loss = 3.7314e-02, PNorm = 69.3316, GNorm = 0.4199, lr_0 = 1.7396e-04
Loss = 3.9777e-02, PNorm = 69.3348, GNorm = 0.4494, lr_0 = 1.7384e-04
Loss = 4.1774e-02, PNorm = 69.3381, GNorm = 0.5170, lr_0 = 1.7372e-04
Loss = 3.7881e-02, PNorm = 69.3405, GNorm = 0.5310, lr_0 = 1.7360e-04
Loss = 3.9017e-02, PNorm = 69.3439, GNorm = 0.3604, lr_0 = 1.7348e-04
Loss = 4.1821e-02, PNorm = 69.3459, GNorm = 0.4643, lr_0 = 1.7336e-04
Loss = 3.8429e-02, PNorm = 69.3483, GNorm = 0.5754, lr_0 = 1.7325e-04
Loss = 4.4821e-02, PNorm = 69.3529, GNorm = 0.4311, lr_0 = 1.7313e-04
Loss = 4.1945e-02, PNorm = 69.3567, GNorm = 0.4494, lr_0 = 1.7301e-04
Loss = 3.6374e-02, PNorm = 69.3593, GNorm = 0.4875, lr_0 = 1.7289e-04
Loss = 3.7220e-02, PNorm = 69.3616, GNorm = 0.4487, lr_0 = 1.7277e-04
Loss = 4.1115e-02, PNorm = 69.3635, GNorm = 0.4402, lr_0 = 1.7265e-04
Loss = 4.6812e-02, PNorm = 69.3671, GNorm = 0.4378, lr_0 = 1.7253e-04
Loss = 4.1669e-02, PNorm = 69.3700, GNorm = 0.6056, lr_0 = 1.7242e-04
Loss = 4.8649e-02, PNorm = 69.3724, GNorm = 0.5101, lr_0 = 1.7230e-04
Loss = 4.0267e-02, PNorm = 69.3750, GNorm = 0.5642, lr_0 = 1.7218e-04
Loss = 4.5389e-02, PNorm = 69.3800, GNorm = 0.5164, lr_0 = 1.7206e-04
Loss = 4.2013e-02, PNorm = 69.3852, GNorm = 0.5234, lr_0 = 1.7194e-04
Loss = 3.7053e-02, PNorm = 69.3889, GNorm = 1.0381, lr_0 = 1.7183e-04
Loss = 3.7588e-02, PNorm = 69.3914, GNorm = 0.4070, lr_0 = 1.7171e-04
Loss = 4.3250e-02, PNorm = 69.3943, GNorm = 0.5055, lr_0 = 1.7159e-04
Loss = 4.1931e-02, PNorm = 69.3992, GNorm = 0.5365, lr_0 = 1.7147e-04
Loss = 3.5306e-02, PNorm = 69.4028, GNorm = 0.5196, lr_0 = 1.7136e-04
Loss = 4.1858e-02, PNorm = 69.4057, GNorm = 0.6173, lr_0 = 1.7124e-04
Loss = 3.7193e-02, PNorm = 69.4071, GNorm = 0.5177, lr_0 = 1.7112e-04
Loss = 3.5420e-02, PNorm = 69.4081, GNorm = 0.3901, lr_0 = 1.7100e-04
Loss = 4.5403e-02, PNorm = 69.4093, GNorm = 0.5848, lr_0 = 1.7089e-04
Loss = 3.8239e-02, PNorm = 69.4126, GNorm = 0.4287, lr_0 = 1.7077e-04
Loss = 4.1161e-02, PNorm = 69.4153, GNorm = 0.7615, lr_0 = 1.7065e-04
Loss = 4.2337e-02, PNorm = 69.4170, GNorm = 0.8805, lr_0 = 1.7054e-04
Loss = 4.0767e-02, PNorm = 69.4192, GNorm = 0.6684, lr_0 = 1.7042e-04
Loss = 4.2592e-02, PNorm = 69.4218, GNorm = 0.6074, lr_0 = 1.7030e-04
Loss = 3.8254e-02, PNorm = 69.4251, GNorm = 0.5779, lr_0 = 1.7019e-04
Loss = 3.9065e-02, PNorm = 69.4275, GNorm = 0.5052, lr_0 = 1.7007e-04
Loss = 3.9009e-02, PNorm = 69.4298, GNorm = 0.3905, lr_0 = 1.6995e-04
Loss = 4.5802e-02, PNorm = 69.4316, GNorm = 0.5562, lr_0 = 1.6984e-04
Loss = 4.4086e-02, PNorm = 69.4336, GNorm = 0.4067, lr_0 = 1.6972e-04
Loss = 3.4888e-02, PNorm = 69.4351, GNorm = 0.4537, lr_0 = 1.6960e-04
Loss = 3.7643e-02, PNorm = 69.4360, GNorm = 0.3595, lr_0 = 1.6949e-04
Loss = 4.9583e-02, PNorm = 69.4376, GNorm = 0.6908, lr_0 = 1.6937e-04
Loss = 4.7921e-02, PNorm = 69.4414, GNorm = 0.5858, lr_0 = 1.6926e-04
Loss = 4.0966e-02, PNorm = 69.4442, GNorm = 0.5332, lr_0 = 1.6914e-04
Loss = 4.4431e-02, PNorm = 69.4475, GNorm = 0.5567, lr_0 = 1.6902e-04
Loss = 4.4510e-02, PNorm = 69.4501, GNorm = 0.4417, lr_0 = 1.6891e-04
Loss = 3.7931e-02, PNorm = 69.4545, GNorm = 0.4481, lr_0 = 1.6879e-04
Loss = 4.1181e-02, PNorm = 69.4576, GNorm = 0.4395, lr_0 = 1.6868e-04
Loss = 4.1091e-02, PNorm = 69.4609, GNorm = 0.6141, lr_0 = 1.6856e-04
Loss = 4.2794e-02, PNorm = 69.4645, GNorm = 0.5264, lr_0 = 1.6845e-04
Loss = 3.9073e-02, PNorm = 69.4682, GNorm = 0.5047, lr_0 = 1.6833e-04
Loss = 4.2198e-02, PNorm = 69.4694, GNorm = 0.4261, lr_0 = 1.6821e-04
Loss = 4.0775e-02, PNorm = 69.4712, GNorm = 0.6492, lr_0 = 1.6810e-04
Loss = 4.6236e-02, PNorm = 69.4719, GNorm = 0.6187, lr_0 = 1.6798e-04
Loss = 3.7684e-02, PNorm = 69.4748, GNorm = 0.4363, lr_0 = 1.6787e-04
Loss = 4.6615e-02, PNorm = 69.4767, GNorm = 0.5486, lr_0 = 1.6775e-04
Loss = 3.7840e-02, PNorm = 69.4795, GNorm = 0.4597, lr_0 = 1.6764e-04
Loss = 3.8834e-02, PNorm = 69.4833, GNorm = 0.4126, lr_0 = 1.6752e-04
Loss = 3.5876e-02, PNorm = 69.4860, GNorm = 0.4919, lr_0 = 1.6741e-04
Loss = 4.3791e-02, PNorm = 69.4882, GNorm = 1.2781, lr_0 = 1.6729e-04
Loss = 4.0249e-02, PNorm = 69.4915, GNorm = 0.5091, lr_0 = 1.6718e-04
Loss = 4.1347e-02, PNorm = 69.4949, GNorm = 0.7245, lr_0 = 1.6707e-04
Loss = 3.5888e-02, PNorm = 69.4970, GNorm = 0.4183, lr_0 = 1.6695e-04
Loss = 4.7055e-02, PNorm = 69.4989, GNorm = 0.7479, lr_0 = 1.6684e-04
Loss = 4.7758e-02, PNorm = 69.5000, GNorm = 0.7022, lr_0 = 1.6672e-04
Loss = 4.0434e-02, PNorm = 69.5019, GNorm = 0.5258, lr_0 = 1.6661e-04
Loss = 3.8605e-02, PNorm = 69.5036, GNorm = 0.4279, lr_0 = 1.6649e-04
Loss = 3.7337e-02, PNorm = 69.5057, GNorm = 0.5044, lr_0 = 1.6638e-04
Loss = 4.4277e-02, PNorm = 69.5079, GNorm = 0.6338, lr_0 = 1.6627e-04
Loss = 4.5814e-02, PNorm = 69.5107, GNorm = 0.5146, lr_0 = 1.6615e-04
Loss = 3.9596e-02, PNorm = 69.5138, GNorm = 0.4322, lr_0 = 1.6604e-04
Loss = 4.7266e-02, PNorm = 69.5173, GNorm = 0.7650, lr_0 = 1.6592e-04
Loss = 4.6586e-02, PNorm = 69.5209, GNorm = 0.5281, lr_0 = 1.6581e-04
Loss = 4.6684e-02, PNorm = 69.5232, GNorm = 0.5780, lr_0 = 1.6570e-04
Loss = 4.6400e-02, PNorm = 69.5254, GNorm = 0.4254, lr_0 = 1.6558e-04
Loss = 4.1908e-02, PNorm = 69.5283, GNorm = 0.4916, lr_0 = 1.6547e-04
Loss = 4.2043e-02, PNorm = 69.5305, GNorm = 0.7337, lr_0 = 1.6536e-04
Loss = 4.4471e-02, PNorm = 69.5323, GNorm = 0.5593, lr_0 = 1.6524e-04
Loss = 4.6829e-02, PNorm = 69.5356, GNorm = 0.4376, lr_0 = 1.6513e-04
Loss = 4.6716e-02, PNorm = 69.5397, GNorm = 0.5043, lr_0 = 1.6502e-04
Loss = 3.7732e-02, PNorm = 69.5429, GNorm = 0.5021, lr_0 = 1.6490e-04
Loss = 3.9777e-02, PNorm = 69.5449, GNorm = 0.4339, lr_0 = 1.6479e-04
Loss = 4.4054e-02, PNorm = 69.5464, GNorm = 0.4653, lr_0 = 1.6468e-04
Loss = 4.0087e-02, PNorm = 69.5488, GNorm = 0.5851, lr_0 = 1.6457e-04
Loss = 3.7939e-02, PNorm = 69.5509, GNorm = 0.5189, lr_0 = 1.6445e-04
Loss = 3.2060e-02, PNorm = 69.5525, GNorm = 0.4725, lr_0 = 1.6434e-04
Loss = 4.8125e-02, PNorm = 69.5541, GNorm = 0.6245, lr_0 = 1.6423e-04
Loss = 4.1296e-02, PNorm = 69.5557, GNorm = 0.5977, lr_0 = 1.6412e-04
Loss = 4.5069e-02, PNorm = 69.5573, GNorm = 0.6306, lr_0 = 1.6400e-04
Loss = 3.9285e-02, PNorm = 69.5592, GNorm = 0.4781, lr_0 = 1.6389e-04
Loss = 4.7407e-02, PNorm = 69.5619, GNorm = 0.4636, lr_0 = 1.6378e-04
Validation mae = 0.387993
Epoch 24
Loss = 3.2292e-02, PNorm = 69.5640, GNorm = 0.4010, lr_0 = 1.6367e-04
Loss = 3.5750e-02, PNorm = 69.5661, GNorm = 0.6588, lr_0 = 1.6355e-04
Loss = 3.3259e-02, PNorm = 69.5677, GNorm = 0.3270, lr_0 = 1.6344e-04
Loss = 3.9843e-02, PNorm = 69.5689, GNorm = 0.7114, lr_0 = 1.6333e-04
Loss = 3.5318e-02, PNorm = 69.5711, GNorm = 0.4192, lr_0 = 1.6322e-04
Loss = 3.5215e-02, PNorm = 69.5742, GNorm = 0.8835, lr_0 = 1.6311e-04
Loss = 3.8241e-02, PNorm = 69.5763, GNorm = 0.5524, lr_0 = 1.6299e-04
Loss = 3.3703e-02, PNorm = 69.5796, GNorm = 0.5153, lr_0 = 1.6288e-04
Loss = 3.4452e-02, PNorm = 69.5825, GNorm = 0.4838, lr_0 = 1.6277e-04
Loss = 4.2331e-02, PNorm = 69.5850, GNorm = 0.4397, lr_0 = 1.6266e-04
Loss = 3.4685e-02, PNorm = 69.5868, GNorm = 0.4467, lr_0 = 1.6255e-04
Loss = 3.4544e-02, PNorm = 69.5889, GNorm = 0.4654, lr_0 = 1.6244e-04
Loss = 3.4003e-02, PNorm = 69.5925, GNorm = 0.4897, lr_0 = 1.6233e-04
Loss = 3.8533e-02, PNorm = 69.5958, GNorm = 0.6070, lr_0 = 1.6221e-04
Loss = 3.6284e-02, PNorm = 69.5981, GNorm = 0.4504, lr_0 = 1.6210e-04
Loss = 2.6300e-02, PNorm = 69.6007, GNorm = 0.4136, lr_0 = 1.6199e-04
Loss = 4.3151e-02, PNorm = 69.6036, GNorm = 0.5689, lr_0 = 1.6188e-04
Loss = 3.0884e-02, PNorm = 69.6052, GNorm = 0.3536, lr_0 = 1.6177e-04
Loss = 3.1064e-02, PNorm = 69.6074, GNorm = 0.6996, lr_0 = 1.6166e-04
Loss = 3.4408e-02, PNorm = 69.6098, GNorm = 0.4554, lr_0 = 1.6155e-04
Loss = 3.6741e-02, PNorm = 69.6122, GNorm = 0.4935, lr_0 = 1.6144e-04
Loss = 3.5309e-02, PNorm = 69.6144, GNorm = 0.4619, lr_0 = 1.6133e-04
Loss = 3.7866e-02, PNorm = 69.6170, GNorm = 0.5721, lr_0 = 1.6122e-04
Loss = 3.4742e-02, PNorm = 69.6202, GNorm = 0.3358, lr_0 = 1.6111e-04
Loss = 3.1746e-02, PNorm = 69.6233, GNorm = 0.3972, lr_0 = 1.6100e-04
Loss = 4.1973e-02, PNorm = 69.6248, GNorm = 0.5257, lr_0 = 1.6089e-04
Loss = 3.9278e-02, PNorm = 69.6265, GNorm = 0.5243, lr_0 = 1.6078e-04
Loss = 3.1262e-02, PNorm = 69.6294, GNorm = 0.7359, lr_0 = 1.6067e-04
Loss = 3.7790e-02, PNorm = 69.6329, GNorm = 0.4078, lr_0 = 1.6056e-04
Loss = 3.4687e-02, PNorm = 69.6354, GNorm = 0.7030, lr_0 = 1.6045e-04
Loss = 3.6252e-02, PNorm = 69.6363, GNorm = 0.4985, lr_0 = 1.6034e-04
Loss = 3.3838e-02, PNorm = 69.6387, GNorm = 0.5890, lr_0 = 1.6023e-04
Loss = 3.2959e-02, PNorm = 69.6417, GNorm = 0.4018, lr_0 = 1.6012e-04
Loss = 3.2878e-02, PNorm = 69.6444, GNorm = 0.4406, lr_0 = 1.6001e-04
Loss = 3.3779e-02, PNorm = 69.6467, GNorm = 0.7040, lr_0 = 1.5990e-04
Loss = 3.8394e-02, PNorm = 69.6494, GNorm = 0.6571, lr_0 = 1.5979e-04
Loss = 3.4689e-02, PNorm = 69.6506, GNorm = 0.6806, lr_0 = 1.5968e-04
Loss = 3.9378e-02, PNorm = 69.6531, GNorm = 0.5705, lr_0 = 1.5957e-04
Loss = 3.1778e-02, PNorm = 69.6560, GNorm = 0.5600, lr_0 = 1.5946e-04
Loss = 3.4960e-02, PNorm = 69.6575, GNorm = 0.6056, lr_0 = 1.5935e-04
Loss = 4.0140e-02, PNorm = 69.6601, GNorm = 0.4111, lr_0 = 1.5924e-04
Loss = 3.9400e-02, PNorm = 69.6629, GNorm = 0.3550, lr_0 = 1.5913e-04
Loss = 3.8515e-02, PNorm = 69.6655, GNorm = 0.6030, lr_0 = 1.5902e-04
Loss = 3.4711e-02, PNorm = 69.6683, GNorm = 0.4866, lr_0 = 1.5891e-04
Loss = 4.0816e-02, PNorm = 69.6719, GNorm = 0.5725, lr_0 = 1.5880e-04
Loss = 4.0835e-02, PNorm = 69.6743, GNorm = 0.5121, lr_0 = 1.5870e-04
Loss = 4.1345e-02, PNorm = 69.6758, GNorm = 0.9416, lr_0 = 1.5859e-04
Loss = 3.5108e-02, PNorm = 69.6775, GNorm = 0.5310, lr_0 = 1.5848e-04
Loss = 3.8833e-02, PNorm = 69.6798, GNorm = 0.8490, lr_0 = 1.5837e-04
Loss = 3.3239e-02, PNorm = 69.6812, GNorm = 0.5083, lr_0 = 1.5826e-04
Loss = 4.0114e-02, PNorm = 69.6830, GNorm = 0.4801, lr_0 = 1.5815e-04
Loss = 4.3205e-02, PNorm = 69.6853, GNorm = 0.4281, lr_0 = 1.5804e-04
Loss = 3.7553e-02, PNorm = 69.6879, GNorm = 0.3709, lr_0 = 1.5794e-04
Loss = 3.2388e-02, PNorm = 69.6898, GNorm = 0.4347, lr_0 = 1.5783e-04
Loss = 3.7051e-02, PNorm = 69.6925, GNorm = 0.4257, lr_0 = 1.5772e-04
Loss = 4.1118e-02, PNorm = 69.6948, GNorm = 0.3520, lr_0 = 1.5761e-04
Loss = 3.5488e-02, PNorm = 69.6967, GNorm = 0.4318, lr_0 = 1.5750e-04
Loss = 3.8374e-02, PNorm = 69.7002, GNorm = 0.5381, lr_0 = 1.5740e-04
Loss = 4.2113e-02, PNorm = 69.7035, GNorm = 0.4735, lr_0 = 1.5729e-04
Loss = 3.4950e-02, PNorm = 69.7066, GNorm = 0.4633, lr_0 = 1.5718e-04
Loss = 3.7346e-02, PNorm = 69.7077, GNorm = 0.4960, lr_0 = 1.5707e-04
Loss = 3.6629e-02, PNorm = 69.7090, GNorm = 0.5583, lr_0 = 1.5697e-04
Loss = 4.2245e-02, PNorm = 69.7123, GNorm = 0.4503, lr_0 = 1.5686e-04
Loss = 4.1538e-02, PNorm = 69.7151, GNorm = 0.4544, lr_0 = 1.5675e-04
Loss = 3.3027e-02, PNorm = 69.7172, GNorm = 0.6689, lr_0 = 1.5664e-04
Loss = 3.7745e-02, PNorm = 69.7186, GNorm = 0.4814, lr_0 = 1.5654e-04
Loss = 4.0341e-02, PNorm = 69.7206, GNorm = 0.5333, lr_0 = 1.5643e-04
Loss = 3.8407e-02, PNorm = 69.7218, GNorm = 0.4991, lr_0 = 1.5632e-04
Loss = 4.0884e-02, PNorm = 69.7241, GNorm = 0.6430, lr_0 = 1.5621e-04
Loss = 3.6086e-02, PNorm = 69.7284, GNorm = 0.3969, lr_0 = 1.5611e-04
Loss = 3.8029e-02, PNorm = 69.7310, GNorm = 0.5014, lr_0 = 1.5600e-04
Loss = 3.5294e-02, PNorm = 69.7347, GNorm = 0.6078, lr_0 = 1.5589e-04
Loss = 3.8460e-02, PNorm = 69.7372, GNorm = 0.4577, lr_0 = 1.5579e-04
Loss = 4.0647e-02, PNorm = 69.7394, GNorm = 0.4744, lr_0 = 1.5568e-04
Loss = 3.6576e-02, PNorm = 69.7418, GNorm = 0.4635, lr_0 = 1.5557e-04
Loss = 4.6910e-02, PNorm = 69.7430, GNorm = 0.4849, lr_0 = 1.5547e-04
Loss = 3.8839e-02, PNorm = 69.7423, GNorm = 0.6455, lr_0 = 1.5536e-04
Loss = 4.0621e-02, PNorm = 69.7440, GNorm = 0.4270, lr_0 = 1.5525e-04
Loss = 4.7282e-02, PNorm = 69.7461, GNorm = 0.4606, lr_0 = 1.5515e-04
Loss = 3.9459e-02, PNorm = 69.7473, GNorm = 0.5085, lr_0 = 1.5504e-04
Loss = 3.6463e-02, PNorm = 69.7498, GNorm = 0.5564, lr_0 = 1.5493e-04
Loss = 4.1609e-02, PNorm = 69.7538, GNorm = 0.5790, lr_0 = 1.5483e-04
Loss = 4.6886e-02, PNorm = 69.7566, GNorm = 0.6665, lr_0 = 1.5472e-04
Loss = 3.9216e-02, PNorm = 69.7580, GNorm = 0.3758, lr_0 = 1.5462e-04
Loss = 4.2205e-02, PNorm = 69.7601, GNorm = 0.4641, lr_0 = 1.5451e-04
Loss = 3.8422e-02, PNorm = 69.7620, GNorm = 0.5248, lr_0 = 1.5440e-04
Loss = 4.2595e-02, PNorm = 69.7642, GNorm = 0.5272, lr_0 = 1.5430e-04
Loss = 3.1746e-02, PNorm = 69.7657, GNorm = 0.4687, lr_0 = 1.5419e-04
Loss = 4.0493e-02, PNorm = 69.7668, GNorm = 0.4953, lr_0 = 1.5409e-04
Loss = 3.3080e-02, PNorm = 69.7677, GNorm = 0.6508, lr_0 = 1.5398e-04
Loss = 3.6282e-02, PNorm = 69.7694, GNorm = 0.5521, lr_0 = 1.5388e-04
Loss = 3.3014e-02, PNorm = 69.7729, GNorm = 0.4989, lr_0 = 1.5377e-04
Loss = 3.6177e-02, PNorm = 69.7759, GNorm = 0.5133, lr_0 = 1.5367e-04
Loss = 4.1112e-02, PNorm = 69.7773, GNorm = 0.7076, lr_0 = 1.5356e-04
Loss = 3.4819e-02, PNorm = 69.7776, GNorm = 0.5340, lr_0 = 1.5346e-04
Loss = 3.6919e-02, PNorm = 69.7787, GNorm = 0.5406, lr_0 = 1.5335e-04
Loss = 4.5767e-02, PNorm = 69.7813, GNorm = 0.6669, lr_0 = 1.5325e-04
Loss = 3.9194e-02, PNorm = 69.7847, GNorm = 0.6256, lr_0 = 1.5314e-04
Loss = 3.5753e-02, PNorm = 69.7871, GNorm = 0.5151, lr_0 = 1.5304e-04
Loss = 4.1990e-02, PNorm = 69.7897, GNorm = 0.4616, lr_0 = 1.5293e-04
Loss = 3.5144e-02, PNorm = 69.7926, GNorm = 0.3953, lr_0 = 1.5283e-04
Loss = 3.1617e-02, PNorm = 69.7942, GNorm = 0.3837, lr_0 = 1.5272e-04
Loss = 4.1946e-02, PNorm = 69.7964, GNorm = 0.3786, lr_0 = 1.5262e-04
Loss = 4.4557e-02, PNorm = 69.7986, GNorm = 0.8188, lr_0 = 1.5251e-04
Loss = 4.3911e-02, PNorm = 69.7998, GNorm = 0.6364, lr_0 = 1.5241e-04
Loss = 4.0186e-02, PNorm = 69.8031, GNorm = 0.5533, lr_0 = 1.5230e-04
Loss = 4.1794e-02, PNorm = 69.8061, GNorm = 0.5097, lr_0 = 1.5220e-04
Loss = 4.0424e-02, PNorm = 69.8097, GNorm = 0.4197, lr_0 = 1.5209e-04
Loss = 3.5124e-02, PNorm = 69.8110, GNorm = 0.4612, lr_0 = 1.5199e-04
Loss = 4.4240e-02, PNorm = 69.8130, GNorm = 0.5701, lr_0 = 1.5189e-04
Loss = 4.1993e-02, PNorm = 69.8156, GNorm = 0.6537, lr_0 = 1.5178e-04
Loss = 4.4014e-02, PNorm = 69.8176, GNorm = 0.5433, lr_0 = 1.5168e-04
Loss = 4.6212e-02, PNorm = 69.8197, GNorm = 0.5308, lr_0 = 1.5157e-04
Loss = 3.7455e-02, PNorm = 69.8212, GNorm = 0.4871, lr_0 = 1.5147e-04
Loss = 3.7574e-02, PNorm = 69.8224, GNorm = 0.4717, lr_0 = 1.5137e-04
Loss = 3.6762e-02, PNorm = 69.8245, GNorm = 0.4387, lr_0 = 1.5126e-04
Loss = 3.6822e-02, PNorm = 69.8256, GNorm = 0.7697, lr_0 = 1.5116e-04
Loss = 3.8284e-02, PNorm = 69.8278, GNorm = 0.5133, lr_0 = 1.5106e-04
Loss = 3.7932e-02, PNorm = 69.8299, GNorm = 0.4404, lr_0 = 1.5095e-04
Loss = 4.4797e-02, PNorm = 69.8326, GNorm = 0.3735, lr_0 = 1.5085e-04
Validation mae = 0.386327
Epoch 25
Loss = 3.3795e-02, PNorm = 69.8355, GNorm = 0.5716, lr_0 = 1.5075e-04
Loss = 3.5295e-02, PNorm = 69.8376, GNorm = 0.4357, lr_0 = 1.5064e-04
Loss = 3.4243e-02, PNorm = 69.8392, GNorm = 0.3819, lr_0 = 1.5054e-04
Loss = 3.0361e-02, PNorm = 69.8399, GNorm = 0.3813, lr_0 = 1.5044e-04
Loss = 3.5396e-02, PNorm = 69.8416, GNorm = 0.5651, lr_0 = 1.5033e-04
Loss = 3.0623e-02, PNorm = 69.8443, GNorm = 0.4612, lr_0 = 1.5023e-04
Loss = 2.9441e-02, PNorm = 69.8475, GNorm = 0.5576, lr_0 = 1.5013e-04
Loss = 3.3987e-02, PNorm = 69.8493, GNorm = 0.4573, lr_0 = 1.5002e-04
Loss = 3.4269e-02, PNorm = 69.8526, GNorm = 0.3730, lr_0 = 1.4992e-04
Loss = 3.8287e-02, PNorm = 69.8540, GNorm = 0.6933, lr_0 = 1.4982e-04
Loss = 3.3909e-02, PNorm = 69.8554, GNorm = 0.4674, lr_0 = 1.4972e-04
Loss = 3.7212e-02, PNorm = 69.8563, GNorm = 0.6333, lr_0 = 1.4961e-04
Loss = 3.6716e-02, PNorm = 69.8593, GNorm = 0.4202, lr_0 = 1.4951e-04
Loss = 3.5219e-02, PNorm = 69.8620, GNorm = 0.4890, lr_0 = 1.4941e-04
Loss = 3.0352e-02, PNorm = 69.8641, GNorm = 0.5075, lr_0 = 1.4931e-04
Loss = 3.6305e-02, PNorm = 69.8659, GNorm = 0.6221, lr_0 = 1.4920e-04
Loss = 3.5689e-02, PNorm = 69.8693, GNorm = 0.4976, lr_0 = 1.4910e-04
Loss = 3.5034e-02, PNorm = 69.8724, GNorm = 0.4199, lr_0 = 1.4900e-04
Loss = 3.2415e-02, PNorm = 69.8730, GNorm = 0.4517, lr_0 = 1.4890e-04
Loss = 3.7774e-02, PNorm = 69.8750, GNorm = 0.4723, lr_0 = 1.4880e-04
Loss = 3.8616e-02, PNorm = 69.8765, GNorm = 0.6250, lr_0 = 1.4869e-04
Loss = 3.5618e-02, PNorm = 69.8804, GNorm = 0.4056, lr_0 = 1.4859e-04
Loss = 3.1304e-02, PNorm = 69.8842, GNorm = 0.4540, lr_0 = 1.4849e-04
Loss = 3.3476e-02, PNorm = 69.8874, GNorm = 0.7236, lr_0 = 1.4839e-04
Loss = 3.9836e-02, PNorm = 69.8901, GNorm = 0.4155, lr_0 = 1.4829e-04
Loss = 3.6519e-02, PNorm = 69.8939, GNorm = 0.6547, lr_0 = 1.4818e-04
Loss = 3.5092e-02, PNorm = 69.8958, GNorm = 0.6752, lr_0 = 1.4808e-04
Loss = 3.3066e-02, PNorm = 69.8969, GNorm = 0.3920, lr_0 = 1.4798e-04
Loss = 3.8216e-02, PNorm = 69.8987, GNorm = 0.4265, lr_0 = 1.4788e-04
Loss = 3.2874e-02, PNorm = 69.9003, GNorm = 0.4021, lr_0 = 1.4778e-04
Loss = 3.3210e-02, PNorm = 69.9029, GNorm = 0.7638, lr_0 = 1.4768e-04
Loss = 3.6714e-02, PNorm = 69.9056, GNorm = 0.5144, lr_0 = 1.4758e-04
Loss = 3.7294e-02, PNorm = 69.9082, GNorm = 0.8090, lr_0 = 1.4748e-04
Loss = 3.6220e-02, PNorm = 69.9104, GNorm = 0.4843, lr_0 = 1.4737e-04
Loss = 3.5272e-02, PNorm = 69.9116, GNorm = 0.7442, lr_0 = 1.4727e-04
Loss = 3.3929e-02, PNorm = 69.9141, GNorm = 0.4706, lr_0 = 1.4717e-04
Loss = 3.3491e-02, PNorm = 69.9175, GNorm = 0.3728, lr_0 = 1.4707e-04
Loss = 3.9160e-02, PNorm = 69.9199, GNorm = 0.7106, lr_0 = 1.4697e-04
Loss = 3.6429e-02, PNorm = 69.9215, GNorm = 0.4414, lr_0 = 1.4687e-04
Loss = 4.2033e-02, PNorm = 69.9240, GNorm = 0.9168, lr_0 = 1.4677e-04
Loss = 3.2993e-02, PNorm = 69.9266, GNorm = 0.5679, lr_0 = 1.4667e-04
Loss = 3.6767e-02, PNorm = 69.9290, GNorm = 0.9932, lr_0 = 1.4657e-04
Loss = 3.4642e-02, PNorm = 69.9319, GNorm = 0.3923, lr_0 = 1.4647e-04
Loss = 3.7715e-02, PNorm = 69.9338, GNorm = 0.5688, lr_0 = 1.4637e-04
Loss = 3.3818e-02, PNorm = 69.9361, GNorm = 0.3750, lr_0 = 1.4627e-04
Loss = 3.2206e-02, PNorm = 69.9394, GNorm = 0.4570, lr_0 = 1.4617e-04
Loss = 3.5808e-02, PNorm = 69.9417, GNorm = 0.5977, lr_0 = 1.4607e-04
Loss = 3.6452e-02, PNorm = 69.9445, GNorm = 0.3558, lr_0 = 1.4597e-04
Loss = 4.1539e-02, PNorm = 69.9482, GNorm = 0.4664, lr_0 = 1.4587e-04
Loss = 4.5930e-02, PNorm = 69.9507, GNorm = 0.6103, lr_0 = 1.4577e-04
Loss = 3.3986e-02, PNorm = 69.9528, GNorm = 0.4358, lr_0 = 1.4567e-04
Loss = 3.6703e-02, PNorm = 69.9542, GNorm = 0.5433, lr_0 = 1.4557e-04
Loss = 3.5858e-02, PNorm = 69.9564, GNorm = 0.7615, lr_0 = 1.4547e-04
Loss = 3.4479e-02, PNorm = 69.9591, GNorm = 0.4827, lr_0 = 1.4537e-04
Loss = 3.8003e-02, PNorm = 69.9611, GNorm = 0.5780, lr_0 = 1.4527e-04
Loss = 3.4919e-02, PNorm = 69.9624, GNorm = 0.6127, lr_0 = 1.4517e-04
Loss = 3.8021e-02, PNorm = 69.9634, GNorm = 0.4545, lr_0 = 1.4507e-04
Loss = 3.5227e-02, PNorm = 69.9639, GNorm = 0.4843, lr_0 = 1.4497e-04
Loss = 3.4504e-02, PNorm = 69.9652, GNorm = 0.5007, lr_0 = 1.4487e-04
Loss = 3.8099e-02, PNorm = 69.9683, GNorm = 0.6562, lr_0 = 1.4477e-04
Loss = 3.5395e-02, PNorm = 69.9718, GNorm = 0.4591, lr_0 = 1.4467e-04
Loss = 3.5698e-02, PNorm = 69.9752, GNorm = 0.4048, lr_0 = 1.4457e-04
Loss = 3.2017e-02, PNorm = 69.9783, GNorm = 0.4487, lr_0 = 1.4447e-04
Loss = 3.8774e-02, PNorm = 69.9800, GNorm = 0.4769, lr_0 = 1.4438e-04
Loss = 3.2921e-02, PNorm = 69.9814, GNorm = 0.5299, lr_0 = 1.4428e-04
Loss = 4.3281e-02, PNorm = 69.9826, GNorm = 0.5629, lr_0 = 1.4418e-04
Loss = 3.3966e-02, PNorm = 69.9842, GNorm = 0.5231, lr_0 = 1.4408e-04
Loss = 3.2462e-02, PNorm = 69.9860, GNorm = 0.4614, lr_0 = 1.4398e-04
Loss = 3.1022e-02, PNorm = 69.9884, GNorm = 0.3742, lr_0 = 1.4388e-04
Loss = 3.7438e-02, PNorm = 69.9907, GNorm = 0.4652, lr_0 = 1.4378e-04
Loss = 3.2600e-02, PNorm = 69.9926, GNorm = 0.3531, lr_0 = 1.4368e-04
Loss = 3.9345e-02, PNorm = 69.9940, GNorm = 0.4158, lr_0 = 1.4359e-04
Loss = 3.5124e-02, PNorm = 69.9963, GNorm = 0.4529, lr_0 = 1.4349e-04
Loss = 3.6757e-02, PNorm = 69.9988, GNorm = 0.4675, lr_0 = 1.4339e-04
Loss = 3.3975e-02, PNorm = 70.0016, GNorm = 0.3868, lr_0 = 1.4329e-04
Loss = 3.8914e-02, PNorm = 70.0028, GNorm = 0.5773, lr_0 = 1.4319e-04
Loss = 3.8299e-02, PNorm = 70.0045, GNorm = 0.4809, lr_0 = 1.4310e-04
Loss = 3.3150e-02, PNorm = 70.0054, GNorm = 0.3696, lr_0 = 1.4300e-04
Loss = 3.5227e-02, PNorm = 70.0069, GNorm = 0.6021, lr_0 = 1.4290e-04
Loss = 4.2605e-02, PNorm = 70.0086, GNorm = 0.4595, lr_0 = 1.4280e-04
Loss = 3.6390e-02, PNorm = 70.0103, GNorm = 0.5032, lr_0 = 1.4270e-04
Loss = 3.3623e-02, PNorm = 70.0129, GNorm = 0.3319, lr_0 = 1.4261e-04
Loss = 3.5849e-02, PNorm = 70.0145, GNorm = 0.6626, lr_0 = 1.4251e-04
Loss = 3.9119e-02, PNorm = 70.0160, GNorm = 0.4695, lr_0 = 1.4241e-04
Loss = 3.6129e-02, PNorm = 70.0177, GNorm = 0.5744, lr_0 = 1.4231e-04
Loss = 3.6290e-02, PNorm = 70.0193, GNorm = 0.4062, lr_0 = 1.4222e-04
Loss = 3.4440e-02, PNorm = 70.0207, GNorm = 0.5347, lr_0 = 1.4212e-04
Loss = 3.6316e-02, PNorm = 70.0224, GNorm = 0.4470, lr_0 = 1.4202e-04
Loss = 3.4143e-02, PNorm = 70.0235, GNorm = 0.4504, lr_0 = 1.4192e-04
Loss = 3.2109e-02, PNorm = 70.0244, GNorm = 0.3581, lr_0 = 1.4183e-04
Loss = 4.1186e-02, PNorm = 70.0254, GNorm = 0.5694, lr_0 = 1.4173e-04
Loss = 3.7459e-02, PNorm = 70.0261, GNorm = 0.4920, lr_0 = 1.4163e-04
Loss = 3.5318e-02, PNorm = 70.0279, GNorm = 0.5871, lr_0 = 1.4153e-04
Loss = 3.4705e-02, PNorm = 70.0307, GNorm = 0.5211, lr_0 = 1.4144e-04
Loss = 3.8232e-02, PNorm = 70.0331, GNorm = 0.4856, lr_0 = 1.4134e-04
Loss = 3.6582e-02, PNorm = 70.0347, GNorm = 0.4828, lr_0 = 1.4124e-04
Loss = 3.8721e-02, PNorm = 70.0360, GNorm = 0.7893, lr_0 = 1.4115e-04
Loss = 3.8036e-02, PNorm = 70.0387, GNorm = 0.4371, lr_0 = 1.4105e-04
Loss = 4.0089e-02, PNorm = 70.0404, GNorm = 0.5024, lr_0 = 1.4095e-04
Loss = 3.3419e-02, PNorm = 70.0416, GNorm = 0.4807, lr_0 = 1.4086e-04
Loss = 3.8917e-02, PNorm = 70.0423, GNorm = 0.4766, lr_0 = 1.4076e-04
Loss = 3.1746e-02, PNorm = 70.0441, GNorm = 0.5996, lr_0 = 1.4066e-04
Loss = 3.8891e-02, PNorm = 70.0451, GNorm = 0.5078, lr_0 = 1.4057e-04
Loss = 3.5405e-02, PNorm = 70.0475, GNorm = 0.4409, lr_0 = 1.4047e-04
Loss = 3.6792e-02, PNorm = 70.0501, GNorm = 0.4391, lr_0 = 1.4038e-04
Loss = 4.1016e-02, PNorm = 70.0517, GNorm = 0.6520, lr_0 = 1.4028e-04
Loss = 3.7662e-02, PNorm = 70.0528, GNorm = 0.4277, lr_0 = 1.4018e-04
Loss = 3.4536e-02, PNorm = 70.0536, GNorm = 0.5085, lr_0 = 1.4009e-04
Loss = 3.9003e-02, PNorm = 70.0554, GNorm = 0.7856, lr_0 = 1.3999e-04
Loss = 3.4951e-02, PNorm = 70.0566, GNorm = 0.4663, lr_0 = 1.3990e-04
Loss = 3.5459e-02, PNorm = 70.0578, GNorm = 0.4357, lr_0 = 1.3980e-04
Loss = 3.5306e-02, PNorm = 70.0608, GNorm = 0.4146, lr_0 = 1.3970e-04
Loss = 3.5005e-02, PNorm = 70.0634, GNorm = 0.5112, lr_0 = 1.3961e-04
Loss = 3.7127e-02, PNorm = 70.0656, GNorm = 0.4474, lr_0 = 1.3951e-04
Loss = 4.3927e-02, PNorm = 70.0684, GNorm = 0.6978, lr_0 = 1.3942e-04
Loss = 3.5232e-02, PNorm = 70.0712, GNorm = 0.5208, lr_0 = 1.3932e-04
Loss = 3.4184e-02, PNorm = 70.0742, GNorm = 0.5776, lr_0 = 1.3923e-04
Loss = 3.8009e-02, PNorm = 70.0768, GNorm = 0.4637, lr_0 = 1.3913e-04
Loss = 4.3879e-02, PNorm = 70.0794, GNorm = 0.6802, lr_0 = 1.3904e-04
Loss = 3.8285e-02, PNorm = 70.0825, GNorm = 0.4859, lr_0 = 1.3894e-04
Validation mae = 0.388874
Epoch 26
Loss = 3.8265e-02, PNorm = 70.0853, GNorm = 0.3728, lr_0 = 1.3884e-04
Loss = 2.6497e-02, PNorm = 70.0872, GNorm = 0.4460, lr_0 = 1.3875e-04
Loss = 3.2042e-02, PNorm = 70.0882, GNorm = 0.7752, lr_0 = 1.3865e-04
Loss = 2.9038e-02, PNorm = 70.0901, GNorm = 0.4874, lr_0 = 1.3856e-04
Loss = 3.0675e-02, PNorm = 70.0923, GNorm = 0.4353, lr_0 = 1.3846e-04
Loss = 3.3950e-02, PNorm = 70.0945, GNorm = 0.5810, lr_0 = 1.3837e-04
Loss = 3.6033e-02, PNorm = 70.0969, GNorm = 0.4688, lr_0 = 1.3828e-04
Loss = 3.3489e-02, PNorm = 70.0994, GNorm = 0.5082, lr_0 = 1.3818e-04
Loss = 3.1237e-02, PNorm = 70.1013, GNorm = 0.3955, lr_0 = 1.3809e-04
Loss = 3.3166e-02, PNorm = 70.1034, GNorm = 0.3960, lr_0 = 1.3799e-04
Loss = 3.6618e-02, PNorm = 70.1062, GNorm = 0.4831, lr_0 = 1.3790e-04
Loss = 3.5191e-02, PNorm = 70.1079, GNorm = 0.4820, lr_0 = 1.3780e-04
Loss = 3.0893e-02, PNorm = 70.1094, GNorm = 0.4075, lr_0 = 1.3771e-04
Loss = 2.9845e-02, PNorm = 70.1106, GNorm = 0.4085, lr_0 = 1.3761e-04
Loss = 3.0682e-02, PNorm = 70.1112, GNorm = 0.3317, lr_0 = 1.3752e-04
Loss = 2.9146e-02, PNorm = 70.1123, GNorm = 0.4728, lr_0 = 1.3742e-04
Loss = 3.0880e-02, PNorm = 70.1145, GNorm = 0.3889, lr_0 = 1.3733e-04
Loss = 3.1172e-02, PNorm = 70.1159, GNorm = 0.9050, lr_0 = 1.3724e-04
Loss = 3.6853e-02, PNorm = 70.1183, GNorm = 0.4428, lr_0 = 1.3714e-04
Loss = 2.9291e-02, PNorm = 70.1209, GNorm = 0.7585, lr_0 = 1.3705e-04
Loss = 2.8591e-02, PNorm = 70.1227, GNorm = 0.4681, lr_0 = 1.3695e-04
Loss = 3.0441e-02, PNorm = 70.1243, GNorm = 0.6150, lr_0 = 1.3686e-04
Loss = 3.0525e-02, PNorm = 70.1257, GNorm = 0.4424, lr_0 = 1.3677e-04
Loss = 3.3050e-02, PNorm = 70.1277, GNorm = 0.5956, lr_0 = 1.3667e-04
Loss = 3.0691e-02, PNorm = 70.1298, GNorm = 0.6015, lr_0 = 1.3658e-04
Loss = 3.5137e-02, PNorm = 70.1323, GNorm = 0.4942, lr_0 = 1.3649e-04
Loss = 3.2130e-02, PNorm = 70.1345, GNorm = 0.4772, lr_0 = 1.3639e-04
Loss = 3.2096e-02, PNorm = 70.1369, GNorm = 0.7360, lr_0 = 1.3630e-04
Loss = 3.0353e-02, PNorm = 70.1387, GNorm = 0.4573, lr_0 = 1.3621e-04
Loss = 2.9318e-02, PNorm = 70.1409, GNorm = 0.2685, lr_0 = 1.3611e-04
Loss = 3.2234e-02, PNorm = 70.1424, GNorm = 0.5619, lr_0 = 1.3602e-04
Loss = 2.7730e-02, PNorm = 70.1433, GNorm = 0.4878, lr_0 = 1.3593e-04
Loss = 4.1600e-02, PNorm = 70.1440, GNorm = 0.5492, lr_0 = 1.3583e-04
Loss = 3.1236e-02, PNorm = 70.1466, GNorm = 0.4544, lr_0 = 1.3574e-04
Loss = 3.5732e-02, PNorm = 70.1486, GNorm = 0.6189, lr_0 = 1.3565e-04
Loss = 3.2238e-02, PNorm = 70.1513, GNorm = 0.4310, lr_0 = 1.3555e-04
Loss = 3.6582e-02, PNorm = 70.1553, GNorm = 0.4991, lr_0 = 1.3546e-04
Loss = 3.0846e-02, PNorm = 70.1579, GNorm = 0.5023, lr_0 = 1.3537e-04
Loss = 3.4977e-02, PNorm = 70.1599, GNorm = 0.6844, lr_0 = 1.3528e-04
Loss = 3.1283e-02, PNorm = 70.1609, GNorm = 0.6504, lr_0 = 1.3518e-04
Loss = 3.3225e-02, PNorm = 70.1630, GNorm = 0.3375, lr_0 = 1.3509e-04
Loss = 3.1284e-02, PNorm = 70.1652, GNorm = 0.4705, lr_0 = 1.3500e-04
Loss = 3.7721e-02, PNorm = 70.1671, GNorm = 0.6389, lr_0 = 1.3491e-04
Loss = 3.1865e-02, PNorm = 70.1691, GNorm = 0.4434, lr_0 = 1.3481e-04
Loss = 3.2982e-02, PNorm = 70.1709, GNorm = 0.5112, lr_0 = 1.3472e-04
Loss = 3.7390e-02, PNorm = 70.1725, GNorm = 0.4352, lr_0 = 1.3463e-04
Loss = 2.9264e-02, PNorm = 70.1740, GNorm = 0.3798, lr_0 = 1.3454e-04
Loss = 3.8656e-02, PNorm = 70.1751, GNorm = 0.4695, lr_0 = 1.3444e-04
Loss = 3.7320e-02, PNorm = 70.1765, GNorm = 0.4422, lr_0 = 1.3435e-04
Loss = 3.1111e-02, PNorm = 70.1778, GNorm = 0.3962, lr_0 = 1.3426e-04
Loss = 4.0355e-02, PNorm = 70.1801, GNorm = 0.5569, lr_0 = 1.3417e-04
Loss = 3.7154e-02, PNorm = 70.1821, GNorm = 0.5400, lr_0 = 1.3408e-04
Loss = 3.5823e-02, PNorm = 70.1851, GNorm = 0.5093, lr_0 = 1.3398e-04
Loss = 3.8048e-02, PNorm = 70.1876, GNorm = 0.3314, lr_0 = 1.3389e-04
Loss = 2.9331e-02, PNorm = 70.1890, GNorm = 0.3378, lr_0 = 1.3380e-04
Loss = 3.0616e-02, PNorm = 70.1905, GNorm = 0.4833, lr_0 = 1.3371e-04
Loss = 3.5228e-02, PNorm = 70.1921, GNorm = 0.4340, lr_0 = 1.3362e-04
Loss = 4.1479e-02, PNorm = 70.1941, GNorm = 0.5424, lr_0 = 1.3353e-04
Loss = 3.7506e-02, PNorm = 70.1962, GNorm = 0.4640, lr_0 = 1.3343e-04
Loss = 3.5616e-02, PNorm = 70.1983, GNorm = 0.4249, lr_0 = 1.3334e-04
Loss = 3.0592e-02, PNorm = 70.2006, GNorm = 0.4394, lr_0 = 1.3325e-04
Loss = 3.2811e-02, PNorm = 70.2024, GNorm = 0.6883, lr_0 = 1.3316e-04
Loss = 3.0203e-02, PNorm = 70.2041, GNorm = 0.5901, lr_0 = 1.3307e-04
Loss = 3.3164e-02, PNorm = 70.2066, GNorm = 0.4763, lr_0 = 1.3298e-04
Loss = 3.5264e-02, PNorm = 70.2097, GNorm = 0.5404, lr_0 = 1.3289e-04
Loss = 3.2475e-02, PNorm = 70.2116, GNorm = 0.3335, lr_0 = 1.3280e-04
Loss = 3.2025e-02, PNorm = 70.2126, GNorm = 0.5245, lr_0 = 1.3270e-04
Loss = 3.5191e-02, PNorm = 70.2140, GNorm = 0.4389, lr_0 = 1.3261e-04
Loss = 3.4775e-02, PNorm = 70.2160, GNorm = 0.3467, lr_0 = 1.3252e-04
Loss = 3.7904e-02, PNorm = 70.2178, GNorm = 0.5916, lr_0 = 1.3243e-04
Loss = 3.4835e-02, PNorm = 70.2192, GNorm = 0.5733, lr_0 = 1.3234e-04
Loss = 3.8494e-02, PNorm = 70.2220, GNorm = 0.3594, lr_0 = 1.3225e-04
Loss = 3.6595e-02, PNorm = 70.2241, GNorm = 0.4964, lr_0 = 1.3216e-04
Loss = 3.4921e-02, PNorm = 70.2260, GNorm = 0.5159, lr_0 = 1.3207e-04
Loss = 3.0565e-02, PNorm = 70.2278, GNorm = 0.5029, lr_0 = 1.3198e-04
Loss = 3.1398e-02, PNorm = 70.2289, GNorm = 0.4678, lr_0 = 1.3189e-04
Loss = 3.3742e-02, PNorm = 70.2304, GNorm = 0.5734, lr_0 = 1.3180e-04
Loss = 3.4798e-02, PNorm = 70.2325, GNorm = 0.5524, lr_0 = 1.3171e-04
Loss = 3.9976e-02, PNorm = 70.2346, GNorm = 0.6165, lr_0 = 1.3162e-04
Loss = 3.4216e-02, PNorm = 70.2369, GNorm = 0.4412, lr_0 = 1.3153e-04
Loss = 3.3800e-02, PNorm = 70.2388, GNorm = 0.4024, lr_0 = 1.3144e-04
Loss = 3.9002e-02, PNorm = 70.2413, GNorm = 0.5207, lr_0 = 1.3135e-04
Loss = 3.1332e-02, PNorm = 70.2427, GNorm = 0.3821, lr_0 = 1.3126e-04
Loss = 3.6690e-02, PNorm = 70.2447, GNorm = 0.5497, lr_0 = 1.3117e-04
Loss = 3.1183e-02, PNorm = 70.2461, GNorm = 0.3949, lr_0 = 1.3108e-04
Loss = 3.3033e-02, PNorm = 70.2479, GNorm = 0.3856, lr_0 = 1.3099e-04
Loss = 3.5109e-02, PNorm = 70.2500, GNorm = 0.4808, lr_0 = 1.3090e-04
Loss = 3.7494e-02, PNorm = 70.2516, GNorm = 0.3226, lr_0 = 1.3081e-04
Loss = 3.0641e-02, PNorm = 70.2528, GNorm = 0.4090, lr_0 = 1.3072e-04
Loss = 3.6187e-02, PNorm = 70.2556, GNorm = 0.5273, lr_0 = 1.3063e-04
Loss = 3.3097e-02, PNorm = 70.2579, GNorm = 0.4946, lr_0 = 1.3054e-04
Loss = 3.8302e-02, PNorm = 70.2594, GNorm = 0.5779, lr_0 = 1.3045e-04
Loss = 3.8091e-02, PNorm = 70.2606, GNorm = 0.4856, lr_0 = 1.3036e-04
Loss = 3.1528e-02, PNorm = 70.2619, GNorm = 0.4942, lr_0 = 1.3027e-04
Loss = 3.3953e-02, PNorm = 70.2640, GNorm = 0.4836, lr_0 = 1.3018e-04
Loss = 3.6303e-02, PNorm = 70.2662, GNorm = 0.6952, lr_0 = 1.3009e-04
Loss = 3.9356e-02, PNorm = 70.2683, GNorm = 0.4930, lr_0 = 1.3000e-04
Loss = 3.1586e-02, PNorm = 70.2701, GNorm = 0.6128, lr_0 = 1.2992e-04
Loss = 3.7190e-02, PNorm = 70.2708, GNorm = 0.4205, lr_0 = 1.2983e-04
Loss = 3.7718e-02, PNorm = 70.2727, GNorm = 0.4612, lr_0 = 1.2974e-04
Loss = 3.5711e-02, PNorm = 70.2740, GNorm = 0.3878, lr_0 = 1.2965e-04
Loss = 2.8822e-02, PNorm = 70.2755, GNorm = 0.4249, lr_0 = 1.2956e-04
Loss = 4.2424e-02, PNorm = 70.2767, GNorm = 0.5500, lr_0 = 1.2947e-04
Loss = 3.7538e-02, PNorm = 70.2775, GNorm = 0.4750, lr_0 = 1.2938e-04
Loss = 3.9838e-02, PNorm = 70.2801, GNorm = 0.4401, lr_0 = 1.2929e-04
Loss = 3.4449e-02, PNorm = 70.2814, GNorm = 0.3923, lr_0 = 1.2921e-04
Loss = 3.6402e-02, PNorm = 70.2831, GNorm = 0.3886, lr_0 = 1.2912e-04
Loss = 3.7414e-02, PNorm = 70.2844, GNorm = 0.3798, lr_0 = 1.2903e-04
Loss = 3.4553e-02, PNorm = 70.2860, GNorm = 0.5539, lr_0 = 1.2894e-04
Loss = 3.4751e-02, PNorm = 70.2877, GNorm = 0.3828, lr_0 = 1.2885e-04
Loss = 3.6222e-02, PNorm = 70.2902, GNorm = 0.6606, lr_0 = 1.2876e-04
Loss = 3.4826e-02, PNorm = 70.2919, GNorm = 0.8583, lr_0 = 1.2867e-04
Loss = 3.7723e-02, PNorm = 70.2927, GNorm = 0.7004, lr_0 = 1.2859e-04
Loss = 3.6508e-02, PNorm = 70.2941, GNorm = 0.4594, lr_0 = 1.2850e-04
Loss = 3.9138e-02, PNorm = 70.2963, GNorm = 0.5220, lr_0 = 1.2841e-04
Loss = 3.4353e-02, PNorm = 70.2977, GNorm = 0.5359, lr_0 = 1.2832e-04
Loss = 3.4189e-02, PNorm = 70.2989, GNorm = 0.4286, lr_0 = 1.2823e-04
Loss = 4.0172e-02, PNorm = 70.3000, GNorm = 0.5532, lr_0 = 1.2815e-04
Loss = 3.7996e-02, PNorm = 70.3015, GNorm = 0.4730, lr_0 = 1.2806e-04
Loss = 3.7143e-02, PNorm = 70.3028, GNorm = 0.4416, lr_0 = 1.2797e-04
Validation mae = 0.388132
Epoch 27
Loss = 3.2229e-02, PNorm = 70.3041, GNorm = 0.4642, lr_0 = 1.2788e-04
Loss = 3.0900e-02, PNorm = 70.3066, GNorm = 0.5140, lr_0 = 1.2780e-04
Loss = 2.8939e-02, PNorm = 70.3088, GNorm = 0.5812, lr_0 = 1.2771e-04
Loss = 2.9504e-02, PNorm = 70.3106, GNorm = 0.4364, lr_0 = 1.2762e-04
Loss = 2.9185e-02, PNorm = 70.3120, GNorm = 0.3967, lr_0 = 1.2753e-04
Loss = 3.1287e-02, PNorm = 70.3135, GNorm = 0.6132, lr_0 = 1.2745e-04
Loss = 2.9504e-02, PNorm = 70.3157, GNorm = 0.3720, lr_0 = 1.2736e-04
Loss = 4.1479e-02, PNorm = 70.3177, GNorm = 0.8351, lr_0 = 1.2727e-04
Loss = 2.9677e-02, PNorm = 70.3201, GNorm = 0.6528, lr_0 = 1.2718e-04
Loss = 3.6496e-02, PNorm = 70.3230, GNorm = 0.5248, lr_0 = 1.2710e-04
Loss = 2.8995e-02, PNorm = 70.3260, GNorm = 0.3515, lr_0 = 1.2701e-04
Loss = 2.8225e-02, PNorm = 70.3279, GNorm = 0.7138, lr_0 = 1.2692e-04
Loss = 3.2456e-02, PNorm = 70.3293, GNorm = 0.6307, lr_0 = 1.2684e-04
Loss = 3.4399e-02, PNorm = 70.3307, GNorm = 0.4177, lr_0 = 1.2675e-04
Loss = 2.8075e-02, PNorm = 70.3332, GNorm = 0.4748, lr_0 = 1.2666e-04
Loss = 3.3611e-02, PNorm = 70.3345, GNorm = 0.4696, lr_0 = 1.2658e-04
Loss = 3.1303e-02, PNorm = 70.3364, GNorm = 0.4171, lr_0 = 1.2649e-04
Loss = 3.2977e-02, PNorm = 70.3387, GNorm = 0.4909, lr_0 = 1.2640e-04
Loss = 3.1357e-02, PNorm = 70.3407, GNorm = 0.6857, lr_0 = 1.2632e-04
Loss = 3.1747e-02, PNorm = 70.3417, GNorm = 0.4963, lr_0 = 1.2623e-04
Loss = 3.3097e-02, PNorm = 70.3426, GNorm = 0.4026, lr_0 = 1.2614e-04
Loss = 3.1371e-02, PNorm = 70.3444, GNorm = 0.5350, lr_0 = 1.2606e-04
Loss = 3.3434e-02, PNorm = 70.3464, GNorm = 0.3850, lr_0 = 1.2597e-04
Loss = 3.1491e-02, PNorm = 70.3479, GNorm = 0.3075, lr_0 = 1.2588e-04
Loss = 3.3216e-02, PNorm = 70.3485, GNorm = 0.5254, lr_0 = 1.2580e-04
Loss = 3.5226e-02, PNorm = 70.3496, GNorm = 0.4123, lr_0 = 1.2571e-04
Loss = 3.1419e-02, PNorm = 70.3515, GNorm = 0.4911, lr_0 = 1.2563e-04
Loss = 2.8084e-02, PNorm = 70.3532, GNorm = 0.5324, lr_0 = 1.2554e-04
Loss = 2.9381e-02, PNorm = 70.3547, GNorm = 0.4021, lr_0 = 1.2545e-04
Loss = 3.0707e-02, PNorm = 70.3566, GNorm = 0.8693, lr_0 = 1.2537e-04
Loss = 3.0197e-02, PNorm = 70.3589, GNorm = 0.4182, lr_0 = 1.2528e-04
Loss = 2.5691e-02, PNorm = 70.3610, GNorm = 0.6788, lr_0 = 1.2520e-04
Loss = 3.8280e-02, PNorm = 70.3628, GNorm = 0.5443, lr_0 = 1.2511e-04
Loss = 2.8779e-02, PNorm = 70.3642, GNorm = 0.7088, lr_0 = 1.2502e-04
Loss = 3.3721e-02, PNorm = 70.3650, GNorm = 0.5146, lr_0 = 1.2494e-04
Loss = 2.8117e-02, PNorm = 70.3668, GNorm = 0.4991, lr_0 = 1.2485e-04
Loss = 3.7907e-02, PNorm = 70.3689, GNorm = 0.4255, lr_0 = 1.2477e-04
Loss = 3.2517e-02, PNorm = 70.3707, GNorm = 0.4700, lr_0 = 1.2468e-04
Loss = 3.0052e-02, PNorm = 70.3725, GNorm = 0.3760, lr_0 = 1.2460e-04
Loss = 2.7713e-02, PNorm = 70.3741, GNorm = 0.4385, lr_0 = 1.2451e-04
Loss = 2.8535e-02, PNorm = 70.3757, GNorm = 0.3297, lr_0 = 1.2443e-04
Loss = 3.0303e-02, PNorm = 70.3776, GNorm = 0.5744, lr_0 = 1.2434e-04
Loss = 2.9389e-02, PNorm = 70.3791, GNorm = 0.5256, lr_0 = 1.2426e-04
Loss = 3.2230e-02, PNorm = 70.3805, GNorm = 0.4287, lr_0 = 1.2417e-04
Loss = 3.1120e-02, PNorm = 70.3825, GNorm = 0.6385, lr_0 = 1.2409e-04
Loss = 2.9691e-02, PNorm = 70.3853, GNorm = 0.5131, lr_0 = 1.2400e-04
Loss = 3.3593e-02, PNorm = 70.3878, GNorm = 0.4472, lr_0 = 1.2392e-04
Loss = 3.4191e-02, PNorm = 70.3909, GNorm = 0.5248, lr_0 = 1.2383e-04
Loss = 2.6596e-02, PNorm = 70.3928, GNorm = 0.4844, lr_0 = 1.2375e-04
Loss = 2.9011e-02, PNorm = 70.3946, GNorm = 0.4790, lr_0 = 1.2366e-04
Loss = 2.9366e-02, PNorm = 70.3954, GNorm = 0.5715, lr_0 = 1.2358e-04
Loss = 3.2379e-02, PNorm = 70.3977, GNorm = 0.4100, lr_0 = 1.2349e-04
Loss = 3.6566e-02, PNorm = 70.4000, GNorm = 0.5350, lr_0 = 1.2341e-04
Loss = 3.4748e-02, PNorm = 70.4024, GNorm = 0.4327, lr_0 = 1.2332e-04
Loss = 3.5102e-02, PNorm = 70.4043, GNorm = 0.4915, lr_0 = 1.2324e-04
Loss = 3.2728e-02, PNorm = 70.4063, GNorm = 0.4350, lr_0 = 1.2315e-04
Loss = 3.7188e-02, PNorm = 70.4088, GNorm = 0.6588, lr_0 = 1.2307e-04
Loss = 3.7612e-02, PNorm = 70.4105, GNorm = 0.7756, lr_0 = 1.2298e-04
Loss = 3.3369e-02, PNorm = 70.4121, GNorm = 0.4265, lr_0 = 1.2290e-04
Loss = 3.3583e-02, PNorm = 70.4131, GNorm = 0.6929, lr_0 = 1.2282e-04
Loss = 3.4826e-02, PNorm = 70.4150, GNorm = 0.4115, lr_0 = 1.2273e-04
Loss = 3.0441e-02, PNorm = 70.4168, GNorm = 0.5531, lr_0 = 1.2265e-04
Loss = 2.9176e-02, PNorm = 70.4181, GNorm = 0.3665, lr_0 = 1.2256e-04
Loss = 2.9233e-02, PNorm = 70.4199, GNorm = 0.5985, lr_0 = 1.2248e-04
Loss = 3.4517e-02, PNorm = 70.4210, GNorm = 0.6785, lr_0 = 1.2240e-04
Loss = 3.2250e-02, PNorm = 70.4224, GNorm = 0.4313, lr_0 = 1.2231e-04
Loss = 3.5224e-02, PNorm = 70.4248, GNorm = 0.5636, lr_0 = 1.2223e-04
Loss = 3.2884e-02, PNorm = 70.4258, GNorm = 0.4405, lr_0 = 1.2214e-04
Loss = 3.4232e-02, PNorm = 70.4276, GNorm = 0.4379, lr_0 = 1.2206e-04
Loss = 2.9875e-02, PNorm = 70.4291, GNorm = 0.4484, lr_0 = 1.2198e-04
Loss = 3.4854e-02, PNorm = 70.4305, GNorm = 0.6119, lr_0 = 1.2189e-04
Loss = 3.2563e-02, PNorm = 70.4311, GNorm = 0.4245, lr_0 = 1.2181e-04
Loss = 3.4164e-02, PNorm = 70.4309, GNorm = 0.4969, lr_0 = 1.2173e-04
Loss = 3.1723e-02, PNorm = 70.4312, GNorm = 0.5056, lr_0 = 1.2164e-04
Loss = 3.4709e-02, PNorm = 70.4325, GNorm = 0.5736, lr_0 = 1.2156e-04
Loss = 3.4014e-02, PNorm = 70.4330, GNorm = 0.4620, lr_0 = 1.2148e-04
Loss = 4.1995e-02, PNorm = 70.4337, GNorm = 0.4377, lr_0 = 1.2139e-04
Loss = 3.1551e-02, PNorm = 70.4355, GNorm = 0.4988, lr_0 = 1.2131e-04
Loss = 3.8227e-02, PNorm = 70.4374, GNorm = 0.4372, lr_0 = 1.2123e-04
Loss = 3.0019e-02, PNorm = 70.4397, GNorm = 0.7147, lr_0 = 1.2114e-04
Loss = 3.1926e-02, PNorm = 70.4413, GNorm = 0.4294, lr_0 = 1.2106e-04
Loss = 3.1500e-02, PNorm = 70.4432, GNorm = 0.4611, lr_0 = 1.2098e-04
Loss = 2.8312e-02, PNorm = 70.4441, GNorm = 0.5312, lr_0 = 1.2090e-04
Loss = 3.5825e-02, PNorm = 70.4455, GNorm = 0.3549, lr_0 = 1.2081e-04
Loss = 3.2580e-02, PNorm = 70.4469, GNorm = 0.5902, lr_0 = 1.2073e-04
Loss = 3.6032e-02, PNorm = 70.4479, GNorm = 0.4989, lr_0 = 1.2065e-04
Loss = 3.2772e-02, PNorm = 70.4488, GNorm = 0.3638, lr_0 = 1.2056e-04
Loss = 3.6890e-02, PNorm = 70.4502, GNorm = 0.6522, lr_0 = 1.2048e-04
Loss = 3.2657e-02, PNorm = 70.4522, GNorm = 0.5569, lr_0 = 1.2040e-04
Loss = 2.9407e-02, PNorm = 70.4535, GNorm = 0.4903, lr_0 = 1.2032e-04
Loss = 3.5167e-02, PNorm = 70.4555, GNorm = 0.5150, lr_0 = 1.2023e-04
Loss = 3.4240e-02, PNorm = 70.4573, GNorm = 0.4351, lr_0 = 1.2015e-04
Loss = 3.0603e-02, PNorm = 70.4587, GNorm = 0.4742, lr_0 = 1.2007e-04
Loss = 3.4313e-02, PNorm = 70.4609, GNorm = 0.4612, lr_0 = 1.1999e-04
Loss = 2.9434e-02, PNorm = 70.4628, GNorm = 0.4960, lr_0 = 1.1991e-04
Loss = 3.1335e-02, PNorm = 70.4642, GNorm = 0.3758, lr_0 = 1.1982e-04
Loss = 3.4624e-02, PNorm = 70.4658, GNorm = 0.4736, lr_0 = 1.1974e-04
Loss = 3.0425e-02, PNorm = 70.4684, GNorm = 0.4137, lr_0 = 1.1966e-04
Loss = 3.5135e-02, PNorm = 70.4701, GNorm = 0.5981, lr_0 = 1.1958e-04
Loss = 3.3217e-02, PNorm = 70.4719, GNorm = 0.3946, lr_0 = 1.1950e-04
Loss = 3.6596e-02, PNorm = 70.4738, GNorm = 0.4734, lr_0 = 1.1941e-04
Loss = 4.1468e-02, PNorm = 70.4757, GNorm = 0.6453, lr_0 = 1.1933e-04
Loss = 3.1869e-02, PNorm = 70.4771, GNorm = 0.6770, lr_0 = 1.1925e-04
Loss = 3.2498e-02, PNorm = 70.4789, GNorm = 0.5446, lr_0 = 1.1917e-04
Loss = 3.0932e-02, PNorm = 70.4805, GNorm = 0.4421, lr_0 = 1.1909e-04
Loss = 3.1596e-02, PNorm = 70.4819, GNorm = 0.4474, lr_0 = 1.1901e-04
Loss = 3.5901e-02, PNorm = 70.4835, GNorm = 0.4350, lr_0 = 1.1892e-04
Loss = 3.3895e-02, PNorm = 70.4847, GNorm = 1.0147, lr_0 = 1.1884e-04
Loss = 4.2043e-02, PNorm = 70.4859, GNorm = 0.5375, lr_0 = 1.1876e-04
Loss = 3.5293e-02, PNorm = 70.4877, GNorm = 0.5444, lr_0 = 1.1868e-04
Loss = 3.2478e-02, PNorm = 70.4891, GNorm = 0.4866, lr_0 = 1.1860e-04
Loss = 3.5401e-02, PNorm = 70.4905, GNorm = 0.4626, lr_0 = 1.1852e-04
Loss = 3.7749e-02, PNorm = 70.4921, GNorm = 0.4615, lr_0 = 1.1844e-04
Loss = 3.5331e-02, PNorm = 70.4930, GNorm = 0.6832, lr_0 = 1.1835e-04
Loss = 3.7210e-02, PNorm = 70.4939, GNorm = 0.4864, lr_0 = 1.1827e-04
Loss = 3.2867e-02, PNorm = 70.4958, GNorm = 0.4002, lr_0 = 1.1819e-04
Loss = 3.4675e-02, PNorm = 70.4975, GNorm = 0.5953, lr_0 = 1.1811e-04
Loss = 3.5562e-02, PNorm = 70.4993, GNorm = 0.6359, lr_0 = 1.1803e-04
Loss = 3.0156e-02, PNorm = 70.5007, GNorm = 0.3331, lr_0 = 1.1795e-04
Loss = 3.1678e-02, PNorm = 70.5015, GNorm = 0.5613, lr_0 = 1.1787e-04
Validation mae = 0.393051
Epoch 28
Loss = 3.3618e-02, PNorm = 70.5031, GNorm = 0.5886, lr_0 = 1.1779e-04
Loss = 3.4009e-02, PNorm = 70.5049, GNorm = 0.5294, lr_0 = 1.1771e-04
Loss = 2.5103e-02, PNorm = 70.5064, GNorm = 0.5388, lr_0 = 1.1763e-04
Loss = 2.8872e-02, PNorm = 70.5088, GNorm = 0.4943, lr_0 = 1.1755e-04
Loss = 2.7147e-02, PNorm = 70.5112, GNorm = 0.5754, lr_0 = 1.1747e-04
Loss = 3.0096e-02, PNorm = 70.5125, GNorm = 0.7035, lr_0 = 1.1739e-04
Loss = 2.9252e-02, PNorm = 70.5140, GNorm = 0.4402, lr_0 = 1.1730e-04
Loss = 3.0395e-02, PNorm = 70.5155, GNorm = 0.5578, lr_0 = 1.1722e-04
Loss = 3.2498e-02, PNorm = 70.5182, GNorm = 0.4693, lr_0 = 1.1714e-04
Loss = 2.8665e-02, PNorm = 70.5215, GNorm = 0.3122, lr_0 = 1.1706e-04
Loss = 2.7998e-02, PNorm = 70.5233, GNorm = 0.4198, lr_0 = 1.1698e-04
Loss = 2.7495e-02, PNorm = 70.5245, GNorm = 0.4505, lr_0 = 1.1690e-04
Loss = 2.6771e-02, PNorm = 70.5255, GNorm = 0.4692, lr_0 = 1.1682e-04
Loss = 2.9859e-02, PNorm = 70.5270, GNorm = 0.4237, lr_0 = 1.1674e-04
Loss = 2.5253e-02, PNorm = 70.5291, GNorm = 0.4070, lr_0 = 1.1666e-04
Loss = 3.0358e-02, PNorm = 70.5313, GNorm = 0.6444, lr_0 = 1.1658e-04
Loss = 3.7867e-02, PNorm = 70.5341, GNorm = 0.4135, lr_0 = 1.1650e-04
Loss = 3.1943e-02, PNorm = 70.5363, GNorm = 0.4298, lr_0 = 1.1642e-04
Loss = 3.2073e-02, PNorm = 70.5366, GNorm = 0.4240, lr_0 = 1.1634e-04
Loss = 3.2372e-02, PNorm = 70.5381, GNorm = 0.4966, lr_0 = 1.1626e-04
Loss = 3.1931e-02, PNorm = 70.5399, GNorm = 0.6420, lr_0 = 1.1618e-04
Loss = 3.2412e-02, PNorm = 70.5416, GNorm = 0.4790, lr_0 = 1.1611e-04
Loss = 3.0009e-02, PNorm = 70.5445, GNorm = 0.7218, lr_0 = 1.1603e-04
Loss = 3.8242e-02, PNorm = 70.5468, GNorm = 0.7065, lr_0 = 1.1595e-04
Loss = 3.1568e-02, PNorm = 70.5491, GNorm = 0.5928, lr_0 = 1.1587e-04
Loss = 2.7302e-02, PNorm = 70.5512, GNorm = 0.3574, lr_0 = 1.1579e-04
Loss = 2.8814e-02, PNorm = 70.5529, GNorm = 0.4328, lr_0 = 1.1571e-04
Loss = 2.8624e-02, PNorm = 70.5542, GNorm = 0.3470, lr_0 = 1.1563e-04
Loss = 3.5031e-02, PNorm = 70.5550, GNorm = 0.4853, lr_0 = 1.1555e-04
Loss = 2.8601e-02, PNorm = 70.5555, GNorm = 0.4348, lr_0 = 1.1547e-04
Loss = 3.0430e-02, PNorm = 70.5560, GNorm = 0.5752, lr_0 = 1.1539e-04
Loss = 3.6120e-02, PNorm = 70.5558, GNorm = 0.5716, lr_0 = 1.1531e-04
Loss = 3.1093e-02, PNorm = 70.5571, GNorm = 0.4033, lr_0 = 1.1523e-04
Loss = 2.7370e-02, PNorm = 70.5583, GNorm = 0.5274, lr_0 = 1.1515e-04
Loss = 2.6690e-02, PNorm = 70.5592, GNorm = 0.4753, lr_0 = 1.1508e-04
Loss = 2.8617e-02, PNorm = 70.5607, GNorm = 0.6873, lr_0 = 1.1500e-04
Loss = 2.5475e-02, PNorm = 70.5621, GNorm = 0.3557, lr_0 = 1.1492e-04
Loss = 3.0580e-02, PNorm = 70.5632, GNorm = 0.4976, lr_0 = 1.1484e-04
Loss = 3.6977e-02, PNorm = 70.5642, GNorm = 0.5429, lr_0 = 1.1476e-04
Loss = 2.8617e-02, PNorm = 70.5652, GNorm = 0.3942, lr_0 = 1.1468e-04
Loss = 3.4599e-02, PNorm = 70.5670, GNorm = 0.2636, lr_0 = 1.1460e-04
Loss = 3.3088e-02, PNorm = 70.5701, GNorm = 0.5259, lr_0 = 1.1452e-04
Loss = 2.9155e-02, PNorm = 70.5719, GNorm = 0.3811, lr_0 = 1.1445e-04
Loss = 3.2038e-02, PNorm = 70.5734, GNorm = 0.5216, lr_0 = 1.1437e-04
Loss = 2.8966e-02, PNorm = 70.5749, GNorm = 0.3873, lr_0 = 1.1429e-04
Loss = 2.9584e-02, PNorm = 70.5764, GNorm = 0.4919, lr_0 = 1.1421e-04
Loss = 2.5151e-02, PNorm = 70.5780, GNorm = 0.4818, lr_0 = 1.1413e-04
Loss = 3.0852e-02, PNorm = 70.5797, GNorm = 0.4452, lr_0 = 1.1405e-04
Loss = 3.3922e-02, PNorm = 70.5809, GNorm = 0.5073, lr_0 = 1.1398e-04
Loss = 3.0457e-02, PNorm = 70.5823, GNorm = 0.4157, lr_0 = 1.1390e-04
Loss = 3.1251e-02, PNorm = 70.5842, GNorm = 0.4044, lr_0 = 1.1382e-04
Loss = 2.9253e-02, PNorm = 70.5864, GNorm = 0.5649, lr_0 = 1.1374e-04
Loss = 3.0237e-02, PNorm = 70.5881, GNorm = 0.4141, lr_0 = 1.1366e-04
Loss = 3.3117e-02, PNorm = 70.5894, GNorm = 0.4526, lr_0 = 1.1359e-04
Loss = 3.1492e-02, PNorm = 70.5910, GNorm = 0.5356, lr_0 = 1.1351e-04
Loss = 3.0580e-02, PNorm = 70.5924, GNorm = 0.4259, lr_0 = 1.1343e-04
Loss = 3.3494e-02, PNorm = 70.5932, GNorm = 0.5026, lr_0 = 1.1335e-04
Loss = 3.1492e-02, PNorm = 70.5946, GNorm = 0.4639, lr_0 = 1.1328e-04
Loss = 2.6827e-02, PNorm = 70.5955, GNorm = 0.4058, lr_0 = 1.1320e-04
Loss = 3.2654e-02, PNorm = 70.5968, GNorm = 0.6472, lr_0 = 1.1312e-04
Loss = 2.8987e-02, PNorm = 70.5977, GNorm = 0.5787, lr_0 = 1.1304e-04
Loss = 3.8959e-02, PNorm = 70.5986, GNorm = 0.5007, lr_0 = 1.1297e-04
Loss = 3.0307e-02, PNorm = 70.6003, GNorm = 0.5762, lr_0 = 1.1289e-04
Loss = 3.3797e-02, PNorm = 70.6017, GNorm = 0.5585, lr_0 = 1.1281e-04
Loss = 3.1186e-02, PNorm = 70.6028, GNorm = 0.5646, lr_0 = 1.1273e-04
Loss = 2.9434e-02, PNorm = 70.6046, GNorm = 0.3715, lr_0 = 1.1266e-04
Loss = 3.1654e-02, PNorm = 70.6061, GNorm = 0.5819, lr_0 = 1.1258e-04
Loss = 3.1267e-02, PNorm = 70.6073, GNorm = 0.3860, lr_0 = 1.1250e-04
Loss = 3.4480e-02, PNorm = 70.6084, GNorm = 0.6742, lr_0 = 1.1243e-04
Loss = 2.8270e-02, PNorm = 70.6105, GNorm = 0.4054, lr_0 = 1.1235e-04
Loss = 2.6587e-02, PNorm = 70.6127, GNorm = 0.3588, lr_0 = 1.1227e-04
Loss = 3.7429e-02, PNorm = 70.6149, GNorm = 0.4932, lr_0 = 1.1219e-04
Loss = 2.8399e-02, PNorm = 70.6168, GNorm = 0.5206, lr_0 = 1.1212e-04
Loss = 2.9284e-02, PNorm = 70.6175, GNorm = 0.4879, lr_0 = 1.1204e-04
Loss = 3.5551e-02, PNorm = 70.6188, GNorm = 0.5527, lr_0 = 1.1196e-04
Loss = 3.4280e-02, PNorm = 70.6209, GNorm = 0.5299, lr_0 = 1.1189e-04
Loss = 3.2914e-02, PNorm = 70.6229, GNorm = 0.4164, lr_0 = 1.1181e-04
Loss = 3.0961e-02, PNorm = 70.6242, GNorm = 0.4009, lr_0 = 1.1173e-04
Loss = 3.3675e-02, PNorm = 70.6255, GNorm = 0.4404, lr_0 = 1.1166e-04
Loss = 3.4975e-02, PNorm = 70.6263, GNorm = 0.5669, lr_0 = 1.1158e-04
Loss = 2.9315e-02, PNorm = 70.6274, GNorm = 0.4247, lr_0 = 1.1150e-04
Loss = 3.1542e-02, PNorm = 70.6284, GNorm = 0.3975, lr_0 = 1.1143e-04
Loss = 2.9246e-02, PNorm = 70.6296, GNorm = 0.4051, lr_0 = 1.1135e-04
Loss = 3.2517e-02, PNorm = 70.6312, GNorm = 0.6696, lr_0 = 1.1128e-04
Loss = 2.9226e-02, PNorm = 70.6329, GNorm = 0.3750, lr_0 = 1.1120e-04
Loss = 2.7610e-02, PNorm = 70.6334, GNorm = 0.3965, lr_0 = 1.1112e-04
Loss = 3.0033e-02, PNorm = 70.6343, GNorm = 0.5126, lr_0 = 1.1105e-04
Loss = 2.9133e-02, PNorm = 70.6356, GNorm = 0.5274, lr_0 = 1.1097e-04
Loss = 3.2124e-02, PNorm = 70.6373, GNorm = 0.4301, lr_0 = 1.1089e-04
Loss = 3.3845e-02, PNorm = 70.6391, GNorm = 0.4993, lr_0 = 1.1082e-04
Loss = 3.9692e-02, PNorm = 70.6404, GNorm = 0.5216, lr_0 = 1.1074e-04
Loss = 3.3707e-02, PNorm = 70.6414, GNorm = 0.5272, lr_0 = 1.1067e-04
Loss = 2.9934e-02, PNorm = 70.6420, GNorm = 0.3901, lr_0 = 1.1059e-04
Loss = 3.3909e-02, PNorm = 70.6437, GNorm = 0.4485, lr_0 = 1.1052e-04
Loss = 3.1870e-02, PNorm = 70.6452, GNorm = 0.5263, lr_0 = 1.1044e-04
Loss = 3.8845e-02, PNorm = 70.6462, GNorm = 0.7883, lr_0 = 1.1036e-04
Loss = 3.5721e-02, PNorm = 70.6476, GNorm = 0.5220, lr_0 = 1.1029e-04
Loss = 3.3911e-02, PNorm = 70.6492, GNorm = 0.5615, lr_0 = 1.1021e-04
Loss = 3.0605e-02, PNorm = 70.6508, GNorm = 0.5647, lr_0 = 1.1014e-04
Loss = 3.3145e-02, PNorm = 70.6518, GNorm = 0.5713, lr_0 = 1.1006e-04
Loss = 3.6214e-02, PNorm = 70.6531, GNorm = 0.4721, lr_0 = 1.0999e-04
Loss = 3.5142e-02, PNorm = 70.6546, GNorm = 0.5110, lr_0 = 1.0991e-04
Loss = 3.4070e-02, PNorm = 70.6564, GNorm = 0.6898, lr_0 = 1.0984e-04
Loss = 3.2802e-02, PNorm = 70.6583, GNorm = 0.5619, lr_0 = 1.0976e-04
Loss = 2.9538e-02, PNorm = 70.6598, GNorm = 0.4297, lr_0 = 1.0969e-04
Loss = 3.4271e-02, PNorm = 70.6612, GNorm = 0.4937, lr_0 = 1.0961e-04
Loss = 3.1237e-02, PNorm = 70.6633, GNorm = 0.4392, lr_0 = 1.0954e-04
Loss = 2.8295e-02, PNorm = 70.6652, GNorm = 0.4676, lr_0 = 1.0946e-04
Loss = 3.0004e-02, PNorm = 70.6670, GNorm = 0.5370, lr_0 = 1.0939e-04
Loss = 3.3986e-02, PNorm = 70.6685, GNorm = 0.4939, lr_0 = 1.0931e-04
Loss = 2.9691e-02, PNorm = 70.6696, GNorm = 0.4796, lr_0 = 1.0924e-04
Loss = 3.5414e-02, PNorm = 70.6706, GNorm = 0.6156, lr_0 = 1.0916e-04
Loss = 2.9593e-02, PNorm = 70.6716, GNorm = 0.4965, lr_0 = 1.0909e-04
Loss = 2.9299e-02, PNorm = 70.6730, GNorm = 0.5264, lr_0 = 1.0901e-04
Loss = 2.8613e-02, PNorm = 70.6740, GNorm = 0.4337, lr_0 = 1.0894e-04
Loss = 3.4407e-02, PNorm = 70.6751, GNorm = 0.5370, lr_0 = 1.0886e-04
Loss = 3.4938e-02, PNorm = 70.6775, GNorm = 0.3860, lr_0 = 1.0879e-04
Loss = 3.2048e-02, PNorm = 70.6795, GNorm = 0.4668, lr_0 = 1.0871e-04
Loss = 2.8665e-02, PNorm = 70.6804, GNorm = 0.5778, lr_0 = 1.0864e-04
Loss = 3.1359e-02, PNorm = 70.6814, GNorm = 0.4126, lr_0 = 1.0856e-04
Validation mae = 0.388580
Epoch 29
Loss = 2.6397e-02, PNorm = 70.6827, GNorm = 0.4142, lr_0 = 1.0849e-04
Loss = 2.6367e-02, PNorm = 70.6848, GNorm = 0.7164, lr_0 = 1.0841e-04
Loss = 2.6162e-02, PNorm = 70.6874, GNorm = 0.5425, lr_0 = 1.0834e-04
Loss = 2.7331e-02, PNorm = 70.6880, GNorm = 0.4022, lr_0 = 1.0827e-04
Loss = 3.1034e-02, PNorm = 70.6893, GNorm = 0.4118, lr_0 = 1.0819e-04
Loss = 2.6826e-02, PNorm = 70.6912, GNorm = 0.3545, lr_0 = 1.0812e-04
Loss = 2.9904e-02, PNorm = 70.6923, GNorm = 0.5201, lr_0 = 1.0804e-04
Loss = 2.9994e-02, PNorm = 70.6938, GNorm = 0.3092, lr_0 = 1.0797e-04
Loss = 2.7116e-02, PNorm = 70.6962, GNorm = 0.4893, lr_0 = 1.0790e-04
Loss = 2.5902e-02, PNorm = 70.6979, GNorm = 0.4837, lr_0 = 1.0782e-04
Loss = 2.5609e-02, PNorm = 70.6994, GNorm = 0.4866, lr_0 = 1.0775e-04
Loss = 2.7481e-02, PNorm = 70.7012, GNorm = 0.6293, lr_0 = 1.0767e-04
Loss = 2.7988e-02, PNorm = 70.7032, GNorm = 0.7655, lr_0 = 1.0760e-04
Loss = 2.7833e-02, PNorm = 70.7055, GNorm = 0.3658, lr_0 = 1.0753e-04
Loss = 3.3120e-02, PNorm = 70.7073, GNorm = 0.5572, lr_0 = 1.0745e-04
Loss = 2.6212e-02, PNorm = 70.7074, GNorm = 0.3553, lr_0 = 1.0738e-04
Loss = 2.9955e-02, PNorm = 70.7080, GNorm = 0.5784, lr_0 = 1.0731e-04
Loss = 3.1215e-02, PNorm = 70.7093, GNorm = 0.5424, lr_0 = 1.0723e-04
Loss = 2.8702e-02, PNorm = 70.7110, GNorm = 0.5013, lr_0 = 1.0716e-04
Loss = 3.0626e-02, PNorm = 70.7130, GNorm = 0.4905, lr_0 = 1.0709e-04
Loss = 3.0835e-02, PNorm = 70.7148, GNorm = 0.5188, lr_0 = 1.0701e-04
Loss = 3.4325e-02, PNorm = 70.7164, GNorm = 0.4722, lr_0 = 1.0694e-04
Loss = 2.7647e-02, PNorm = 70.7167, GNorm = 0.4105, lr_0 = 1.0687e-04
Loss = 2.9756e-02, PNorm = 70.7180, GNorm = 0.5038, lr_0 = 1.0679e-04
Loss = 3.0057e-02, PNorm = 70.7196, GNorm = 0.4137, lr_0 = 1.0672e-04
Loss = 3.4861e-02, PNorm = 70.7221, GNorm = 0.4953, lr_0 = 1.0665e-04
Loss = 2.4075e-02, PNorm = 70.7237, GNorm = 0.3331, lr_0 = 1.0657e-04
Loss = 2.4431e-02, PNorm = 70.7246, GNorm = 0.4830, lr_0 = 1.0650e-04
Loss = 2.7744e-02, PNorm = 70.7258, GNorm = 0.5089, lr_0 = 1.0643e-04
Loss = 2.8016e-02, PNorm = 70.7265, GNorm = 0.5469, lr_0 = 1.0635e-04
Loss = 2.9529e-02, PNorm = 70.7280, GNorm = 0.6414, lr_0 = 1.0628e-04
Loss = 2.7342e-02, PNorm = 70.7288, GNorm = 0.4700, lr_0 = 1.0621e-04
Loss = 2.5393e-02, PNorm = 70.7301, GNorm = 0.4482, lr_0 = 1.0614e-04
Loss = 3.2835e-02, PNorm = 70.7315, GNorm = 0.6365, lr_0 = 1.0606e-04
Loss = 3.0378e-02, PNorm = 70.7329, GNorm = 0.3845, lr_0 = 1.0599e-04
Loss = 2.9087e-02, PNorm = 70.7348, GNorm = 0.5209, lr_0 = 1.0592e-04
Loss = 2.5115e-02, PNorm = 70.7361, GNorm = 0.8516, lr_0 = 1.0585e-04
Loss = 2.9125e-02, PNorm = 70.7371, GNorm = 0.4097, lr_0 = 1.0577e-04
Loss = 3.0936e-02, PNorm = 70.7380, GNorm = 0.3483, lr_0 = 1.0570e-04
Loss = 2.8520e-02, PNorm = 70.7391, GNorm = 0.6022, lr_0 = 1.0563e-04
Loss = 3.6059e-02, PNorm = 70.7408, GNorm = 0.3865, lr_0 = 1.0556e-04
Loss = 2.5289e-02, PNorm = 70.7423, GNorm = 0.4852, lr_0 = 1.0548e-04
Loss = 3.5278e-02, PNorm = 70.7437, GNorm = 0.5010, lr_0 = 1.0541e-04
Loss = 2.5293e-02, PNorm = 70.7442, GNorm = 0.4726, lr_0 = 1.0534e-04
Loss = 3.1615e-02, PNorm = 70.7454, GNorm = 0.3605, lr_0 = 1.0527e-04
Loss = 2.8751e-02, PNorm = 70.7466, GNorm = 0.4676, lr_0 = 1.0519e-04
Loss = 3.2885e-02, PNorm = 70.7478, GNorm = 0.4728, lr_0 = 1.0512e-04
Loss = 2.9003e-02, PNorm = 70.7499, GNorm = 0.4178, lr_0 = 1.0505e-04
Loss = 2.7056e-02, PNorm = 70.7518, GNorm = 0.6774, lr_0 = 1.0498e-04
Loss = 2.9052e-02, PNorm = 70.7530, GNorm = 0.3453, lr_0 = 1.0491e-04
Loss = 2.5620e-02, PNorm = 70.7539, GNorm = 0.4178, lr_0 = 1.0483e-04
Loss = 3.4437e-02, PNorm = 70.7549, GNorm = 0.4057, lr_0 = 1.0476e-04
Loss = 2.5617e-02, PNorm = 70.7562, GNorm = 0.4588, lr_0 = 1.0469e-04
Loss = 3.5729e-02, PNorm = 70.7580, GNorm = 0.7106, lr_0 = 1.0462e-04
Loss = 3.3195e-02, PNorm = 70.7598, GNorm = 0.6418, lr_0 = 1.0455e-04
Loss = 3.1135e-02, PNorm = 70.7619, GNorm = 0.3968, lr_0 = 1.0448e-04
Loss = 2.7043e-02, PNorm = 70.7636, GNorm = 0.5242, lr_0 = 1.0440e-04
Loss = 3.2189e-02, PNorm = 70.7648, GNorm = 0.4235, lr_0 = 1.0433e-04
Loss = 3.0326e-02, PNorm = 70.7661, GNorm = 0.3519, lr_0 = 1.0426e-04
Loss = 2.4410e-02, PNorm = 70.7674, GNorm = 0.4279, lr_0 = 1.0419e-04
Loss = 2.4610e-02, PNorm = 70.7684, GNorm = 0.4115, lr_0 = 1.0412e-04
Loss = 2.8461e-02, PNorm = 70.7694, GNorm = 0.4631, lr_0 = 1.0405e-04
Loss = 3.0194e-02, PNorm = 70.7717, GNorm = 0.6538, lr_0 = 1.0398e-04
Loss = 2.8070e-02, PNorm = 70.7737, GNorm = 0.5137, lr_0 = 1.0391e-04
Loss = 3.2567e-02, PNorm = 70.7753, GNorm = 0.5077, lr_0 = 1.0383e-04
Loss = 3.0827e-02, PNorm = 70.7766, GNorm = 0.3819, lr_0 = 1.0376e-04
Loss = 3.1867e-02, PNorm = 70.7780, GNorm = 0.7022, lr_0 = 1.0369e-04
Loss = 3.0657e-02, PNorm = 70.7795, GNorm = 0.3814, lr_0 = 1.0362e-04
Loss = 3.0883e-02, PNorm = 70.7810, GNorm = 0.4262, lr_0 = 1.0355e-04
Loss = 2.7020e-02, PNorm = 70.7831, GNorm = 0.3717, lr_0 = 1.0348e-04
Loss = 3.2990e-02, PNorm = 70.7847, GNorm = 0.5067, lr_0 = 1.0341e-04
Loss = 2.7623e-02, PNorm = 70.7854, GNorm = 0.3585, lr_0 = 1.0334e-04
Loss = 3.6382e-02, PNorm = 70.7859, GNorm = 0.4059, lr_0 = 1.0327e-04
Loss = 3.1917e-02, PNorm = 70.7866, GNorm = 0.6258, lr_0 = 1.0320e-04
Loss = 3.5177e-02, PNorm = 70.7866, GNorm = 0.4217, lr_0 = 1.0312e-04
Loss = 3.4302e-02, PNorm = 70.7871, GNorm = 0.8934, lr_0 = 1.0305e-04
Loss = 2.7163e-02, PNorm = 70.7880, GNorm = 0.4987, lr_0 = 1.0298e-04
Loss = 2.8534e-02, PNorm = 70.7888, GNorm = 0.4821, lr_0 = 1.0291e-04
Loss = 2.8915e-02, PNorm = 70.7902, GNorm = 0.5365, lr_0 = 1.0284e-04
Loss = 3.0836e-02, PNorm = 70.7908, GNorm = 0.4443, lr_0 = 1.0277e-04
Loss = 3.2790e-02, PNorm = 70.7924, GNorm = 0.4571, lr_0 = 1.0270e-04
Loss = 3.1477e-02, PNorm = 70.7941, GNorm = 0.5447, lr_0 = 1.0263e-04
Loss = 3.0159e-02, PNorm = 70.7956, GNorm = 0.4518, lr_0 = 1.0256e-04
Loss = 2.7666e-02, PNorm = 70.7966, GNorm = 0.4236, lr_0 = 1.0249e-04
Loss = 2.9394e-02, PNorm = 70.7979, GNorm = 0.4576, lr_0 = 1.0242e-04
Loss = 2.7235e-02, PNorm = 70.7997, GNorm = 0.5553, lr_0 = 1.0235e-04
Loss = 2.7947e-02, PNorm = 70.8014, GNorm = 0.4207, lr_0 = 1.0228e-04
Loss = 2.6214e-02, PNorm = 70.8026, GNorm = 0.3809, lr_0 = 1.0221e-04
Loss = 2.6552e-02, PNorm = 70.8038, GNorm = 0.4612, lr_0 = 1.0214e-04
Loss = 3.2246e-02, PNorm = 70.8047, GNorm = 0.4497, lr_0 = 1.0207e-04
Loss = 3.2943e-02, PNorm = 70.8051, GNorm = 0.4554, lr_0 = 1.0200e-04
Loss = 3.1927e-02, PNorm = 70.8063, GNorm = 0.4103, lr_0 = 1.0193e-04
Loss = 2.5849e-02, PNorm = 70.8084, GNorm = 0.6503, lr_0 = 1.0186e-04
Loss = 3.1115e-02, PNorm = 70.8097, GNorm = 0.4583, lr_0 = 1.0179e-04
Loss = 3.1944e-02, PNorm = 70.8107, GNorm = 0.3537, lr_0 = 1.0172e-04
Loss = 3.0761e-02, PNorm = 70.8118, GNorm = 0.3834, lr_0 = 1.0165e-04
Loss = 3.6007e-02, PNorm = 70.8130, GNorm = 0.8878, lr_0 = 1.0158e-04
Loss = 3.0429e-02, PNorm = 70.8149, GNorm = 0.3069, lr_0 = 1.0151e-04
Loss = 3.2320e-02, PNorm = 70.8168, GNorm = 0.4876, lr_0 = 1.0144e-04
Loss = 3.0223e-02, PNorm = 70.8190, GNorm = 0.4537, lr_0 = 1.0137e-04
Loss = 3.3317e-02, PNorm = 70.8204, GNorm = 0.3781, lr_0 = 1.0130e-04
Loss = 3.5071e-02, PNorm = 70.8218, GNorm = 0.6023, lr_0 = 1.0123e-04
Loss = 3.6066e-02, PNorm = 70.8229, GNorm = 0.3798, lr_0 = 1.0116e-04
Loss = 2.8435e-02, PNorm = 70.8244, GNorm = 0.8004, lr_0 = 1.0110e-04
Loss = 3.0828e-02, PNorm = 70.8250, GNorm = 0.4058, lr_0 = 1.0103e-04
Loss = 3.0627e-02, PNorm = 70.8251, GNorm = 0.3721, lr_0 = 1.0096e-04
Loss = 3.2776e-02, PNorm = 70.8252, GNorm = 0.5644, lr_0 = 1.0089e-04
Loss = 3.3574e-02, PNorm = 70.8258, GNorm = 0.6630, lr_0 = 1.0082e-04
Loss = 3.0138e-02, PNorm = 70.8264, GNorm = 0.5315, lr_0 = 1.0075e-04
Loss = 2.9106e-02, PNorm = 70.8278, GNorm = 0.5545, lr_0 = 1.0068e-04
Loss = 2.8312e-02, PNorm = 70.8289, GNorm = 0.3549, lr_0 = 1.0061e-04
Loss = 3.1765e-02, PNorm = 70.8300, GNorm = 0.4188, lr_0 = 1.0054e-04
Loss = 3.0310e-02, PNorm = 70.8310, GNorm = 0.3950, lr_0 = 1.0047e-04
Loss = 3.5193e-02, PNorm = 70.8324, GNorm = 0.5134, lr_0 = 1.0041e-04
Loss = 2.9779e-02, PNorm = 70.8332, GNorm = 0.4767, lr_0 = 1.0034e-04
Loss = 3.0174e-02, PNorm = 70.8341, GNorm = 0.4420, lr_0 = 1.0027e-04
Loss = 2.9079e-02, PNorm = 70.8357, GNorm = 0.4504, lr_0 = 1.0020e-04
Loss = 3.0910e-02, PNorm = 70.8368, GNorm = 0.5582, lr_0 = 1.0013e-04
Loss = 3.2698e-02, PNorm = 70.8379, GNorm = 0.6645, lr_0 = 1.0006e-04
Loss = 2.4265e-02, PNorm = 70.8387, GNorm = 0.4660, lr_0 = 1.0000e-04
Validation mae = 0.389407
Model 0 best validation mae = 0.384346 on epoch 18
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Moving model to cuda
Model 0 test mae = 0.381892
Ensemble test mae = 0.381892
Fold 8
Splitting data with seed 8
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.0, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=400, bias=False)
        (W_h): Linear(in_features=400, out_features=400, bias=False)
        (W_o): Linear(in_features=533, out_features=400, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=400, out_features=400, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=400, out_features=1, bias=True)
  )
)
Number of parameters = 593,201
Moving model to cuda
Epoch 0
Loss = 9.0625e-01, PNorm = 38.3710, GNorm = 4.1723, lr_0 = 1.0413e-04
Loss = 8.9565e-01, PNorm = 38.3744, GNorm = 4.7520, lr_0 = 1.0788e-04
Loss = 8.1105e-01, PNorm = 38.3775, GNorm = 11.0777, lr_0 = 1.1163e-04
Loss = 6.4538e-01, PNorm = 38.3809, GNorm = 7.6268, lr_0 = 1.1537e-04
Loss = 6.6210e-01, PNorm = 38.3846, GNorm = 5.5993, lr_0 = 1.1913e-04
Loss = 6.7224e-01, PNorm = 38.3890, GNorm = 1.5015, lr_0 = 1.2287e-04
Loss = 6.0044e-01, PNorm = 38.3935, GNorm = 2.7064, lr_0 = 1.2663e-04
Loss = 6.6317e-01, PNorm = 38.3990, GNorm = 8.5857, lr_0 = 1.3038e-04
Loss = 6.6737e-01, PNorm = 38.4032, GNorm = 5.9664, lr_0 = 1.3413e-04
Loss = 5.8152e-01, PNorm = 38.4084, GNorm = 5.5920, lr_0 = 1.3788e-04
Loss = 5.5713e-01, PNorm = 38.4150, GNorm = 1.2952, lr_0 = 1.4163e-04
Loss = 5.6268e-01, PNorm = 38.4205, GNorm = 3.9694, lr_0 = 1.4537e-04
Loss = 4.8901e-01, PNorm = 38.4244, GNorm = 2.9088, lr_0 = 1.4913e-04
Loss = 5.1947e-01, PNorm = 38.4301, GNorm = 9.8175, lr_0 = 1.5288e-04
Loss = 4.6203e-01, PNorm = 38.4380, GNorm = 7.4051, lr_0 = 1.5662e-04
Loss = 3.8042e-01, PNorm = 38.4457, GNorm = 1.1702, lr_0 = 1.6038e-04
Loss = 5.1225e-01, PNorm = 38.4500, GNorm = 15.6726, lr_0 = 1.6412e-04
Loss = 4.9572e-01, PNorm = 38.4516, GNorm = 6.1752, lr_0 = 1.6788e-04
Loss = 4.1511e-01, PNorm = 38.4559, GNorm = 2.8048, lr_0 = 1.7163e-04
Loss = 4.4165e-01, PNorm = 38.4618, GNorm = 10.7947, lr_0 = 1.7538e-04
Loss = 4.5571e-01, PNorm = 38.4678, GNorm = 12.0726, lr_0 = 1.7913e-04
Loss = 4.6347e-01, PNorm = 38.4735, GNorm = 13.0178, lr_0 = 1.8288e-04
Loss = 4.2376e-01, PNorm = 38.4787, GNorm = 14.3862, lr_0 = 1.8662e-04
Loss = 4.3075e-01, PNorm = 38.4831, GNorm = 6.4028, lr_0 = 1.9038e-04
Loss = 5.2094e-01, PNorm = 38.4885, GNorm = 28.4758, lr_0 = 1.9413e-04
Loss = 4.5662e-01, PNorm = 38.4946, GNorm = 2.2510, lr_0 = 1.9788e-04
Loss = 4.1377e-01, PNorm = 38.5010, GNorm = 10.3233, lr_0 = 2.0163e-04
Loss = 4.3923e-01, PNorm = 38.5065, GNorm = 18.4608, lr_0 = 2.0537e-04
Loss = 4.2957e-01, PNorm = 38.5141, GNorm = 5.9787, lr_0 = 2.0913e-04
Loss = 4.5154e-01, PNorm = 38.5202, GNorm = 4.1448, lr_0 = 2.1288e-04
Loss = 4.2516e-01, PNorm = 38.5246, GNorm = 9.1186, lr_0 = 2.1663e-04
Loss = 4.3481e-01, PNorm = 38.5320, GNorm = 6.9947, lr_0 = 2.2038e-04
Loss = 3.7514e-01, PNorm = 38.5396, GNorm = 15.1040, lr_0 = 2.2412e-04
Loss = 4.1021e-01, PNorm = 38.5444, GNorm = 4.3991, lr_0 = 2.2787e-04
Loss = 3.6798e-01, PNorm = 38.5498, GNorm = 15.8655, lr_0 = 2.3163e-04
Loss = 4.2585e-01, PNorm = 38.5575, GNorm = 23.4237, lr_0 = 2.3538e-04
Loss = 3.7017e-01, PNorm = 38.5637, GNorm = 4.0118, lr_0 = 2.3913e-04
Loss = 3.4991e-01, PNorm = 38.5707, GNorm = 7.0898, lr_0 = 2.4288e-04
Loss = 2.9939e-01, PNorm = 38.5752, GNorm = 3.2265, lr_0 = 2.4662e-04
Loss = 3.8256e-01, PNorm = 38.5796, GNorm = 13.4553, lr_0 = 2.5038e-04
Loss = 3.1653e-01, PNorm = 38.5854, GNorm = 3.7464, lr_0 = 2.5413e-04
Loss = 4.3097e-01, PNorm = 38.5880, GNorm = 25.0113, lr_0 = 2.5788e-04
Loss = 4.2543e-01, PNorm = 38.5900, GNorm = 7.3226, lr_0 = 2.6163e-04
Loss = 3.2650e-01, PNorm = 38.5970, GNorm = 1.5596, lr_0 = 2.6537e-04
Loss = 3.2181e-01, PNorm = 38.6033, GNorm = 5.1560, lr_0 = 2.6912e-04
Loss = 3.8927e-01, PNorm = 38.6089, GNorm = 6.5150, lr_0 = 2.7288e-04
Loss = 4.1348e-01, PNorm = 38.6165, GNorm = 6.4655, lr_0 = 2.7663e-04
Loss = 3.2917e-01, PNorm = 38.6256, GNorm = 3.6052, lr_0 = 2.8038e-04
Loss = 3.0293e-01, PNorm = 38.6321, GNorm = 5.3374, lr_0 = 2.8413e-04
Loss = 3.4126e-01, PNorm = 38.6380, GNorm = 7.3378, lr_0 = 2.8787e-04
Loss = 2.9457e-01, PNorm = 38.6472, GNorm = 4.0129, lr_0 = 2.9163e-04
Loss = 2.9386e-01, PNorm = 38.6531, GNorm = 4.8545, lr_0 = 2.9538e-04
Loss = 3.5840e-01, PNorm = 38.6543, GNorm = 19.4145, lr_0 = 2.9913e-04
Loss = 3.7345e-01, PNorm = 38.6568, GNorm = 15.9767, lr_0 = 3.0288e-04
Loss = 3.3375e-01, PNorm = 38.6636, GNorm = 13.6116, lr_0 = 3.0662e-04
Loss = 2.9588e-01, PNorm = 38.6713, GNorm = 7.0384, lr_0 = 3.1037e-04
Loss = 3.0270e-01, PNorm = 38.6769, GNorm = 7.5461, lr_0 = 3.1413e-04
Loss = 3.1385e-01, PNorm = 38.6832, GNorm = 12.6737, lr_0 = 3.1788e-04
Loss = 2.8731e-01, PNorm = 38.6915, GNorm = 8.9474, lr_0 = 3.2163e-04
Loss = 3.4871e-01, PNorm = 38.6961, GNorm = 6.2880, lr_0 = 3.2538e-04
Loss = 3.3834e-01, PNorm = 38.7045, GNorm = 3.1916, lr_0 = 3.2912e-04
Loss = 3.7266e-01, PNorm = 38.7116, GNorm = 5.3388, lr_0 = 3.3288e-04
Loss = 3.6696e-01, PNorm = 38.7215, GNorm = 15.2551, lr_0 = 3.3663e-04
Loss = 3.7156e-01, PNorm = 38.7285, GNorm = 19.1177, lr_0 = 3.4038e-04
Loss = 3.4056e-01, PNorm = 38.7371, GNorm = 14.3065, lr_0 = 3.4413e-04
Loss = 3.5603e-01, PNorm = 38.7455, GNorm = 4.6398, lr_0 = 3.4787e-04
Loss = 3.3029e-01, PNorm = 38.7519, GNorm = 6.9040, lr_0 = 3.5162e-04
Loss = 3.3172e-01, PNorm = 38.7596, GNorm = 5.9715, lr_0 = 3.5538e-04
Loss = 3.1829e-01, PNorm = 38.7699, GNorm = 1.6409, lr_0 = 3.5913e-04
Loss = 3.2296e-01, PNorm = 38.7756, GNorm = 18.0745, lr_0 = 3.6288e-04
Loss = 3.8394e-01, PNorm = 38.7816, GNorm = 8.1479, lr_0 = 3.6662e-04
Loss = 3.3738e-01, PNorm = 38.7916, GNorm = 5.1651, lr_0 = 3.7037e-04
Loss = 3.4934e-01, PNorm = 38.8020, GNorm = 2.9633, lr_0 = 3.7413e-04
Loss = 3.2275e-01, PNorm = 38.8128, GNorm = 11.7273, lr_0 = 3.7788e-04
Loss = 3.6524e-01, PNorm = 38.8215, GNorm = 17.3397, lr_0 = 3.8163e-04
Loss = 4.0145e-01, PNorm = 38.8281, GNorm = 15.6341, lr_0 = 3.8537e-04
Loss = 4.7796e-01, PNorm = 38.8382, GNorm = 2.0381, lr_0 = 3.8912e-04
Loss = 3.0539e-01, PNorm = 38.8521, GNorm = 1.6158, lr_0 = 3.9287e-04
Loss = 2.9976e-01, PNorm = 38.8652, GNorm = 0.8706, lr_0 = 3.9663e-04
Loss = 2.7236e-01, PNorm = 38.8751, GNorm = 3.9781, lr_0 = 4.0038e-04
Loss = 3.4031e-01, PNorm = 38.8839, GNorm = 10.0668, lr_0 = 4.0413e-04
Loss = 2.9810e-01, PNorm = 38.8913, GNorm = 11.3861, lr_0 = 4.0787e-04
Loss = 2.9281e-01, PNorm = 38.9015, GNorm = 3.3514, lr_0 = 4.1162e-04
Loss = 2.5734e-01, PNorm = 38.9141, GNorm = 11.4856, lr_0 = 4.1537e-04
Loss = 2.9950e-01, PNorm = 38.9250, GNorm = 0.9487, lr_0 = 4.1913e-04
Loss = 2.9584e-01, PNorm = 38.9319, GNorm = 4.0959, lr_0 = 4.2288e-04
Loss = 2.9992e-01, PNorm = 38.9398, GNorm = 9.5897, lr_0 = 4.2662e-04
Loss = 2.6277e-01, PNorm = 38.9501, GNorm = 9.2293, lr_0 = 4.3037e-04
Loss = 3.0696e-01, PNorm = 38.9604, GNorm = 12.8871, lr_0 = 4.3412e-04
Loss = 3.0125e-01, PNorm = 38.9695, GNorm = 10.2562, lr_0 = 4.3788e-04
Loss = 2.7648e-01, PNorm = 38.9793, GNorm = 5.0994, lr_0 = 4.4163e-04
Loss = 2.9028e-01, PNorm = 38.9936, GNorm = 4.5664, lr_0 = 4.4538e-04
Loss = 3.0088e-01, PNorm = 39.0052, GNorm = 3.8288, lr_0 = 4.4912e-04
Loss = 2.7991e-01, PNorm = 39.0134, GNorm = 3.4014, lr_0 = 4.5287e-04
Loss = 2.6634e-01, PNorm = 39.0231, GNorm = 1.6158, lr_0 = 4.5662e-04
Loss = 3.1382e-01, PNorm = 39.0326, GNorm = 3.8597, lr_0 = 4.6038e-04
Loss = 2.5840e-01, PNorm = 39.0389, GNorm = 1.2866, lr_0 = 4.6413e-04
Loss = 3.0597e-01, PNorm = 39.0456, GNorm = 7.2330, lr_0 = 4.6787e-04
Loss = 3.1248e-01, PNorm = 39.0578, GNorm = 0.8606, lr_0 = 4.7162e-04
Loss = 2.7401e-01, PNorm = 39.0735, GNorm = 4.2134, lr_0 = 4.7537e-04
Loss = 2.8705e-01, PNorm = 39.0898, GNorm = 8.4879, lr_0 = 4.7913e-04
Loss = 3.2617e-01, PNorm = 39.0970, GNorm = 11.0748, lr_0 = 4.8288e-04
Loss = 3.0386e-01, PNorm = 39.1054, GNorm = 1.0434, lr_0 = 4.8663e-04
Loss = 3.3831e-01, PNorm = 39.1146, GNorm = 10.6229, lr_0 = 4.9038e-04
Loss = 2.9135e-01, PNorm = 39.1263, GNorm = 11.1359, lr_0 = 4.9412e-04
Loss = 2.9030e-01, PNorm = 39.1375, GNorm = 7.5608, lr_0 = 4.9788e-04
Loss = 2.3601e-01, PNorm = 39.1449, GNorm = 4.4535, lr_0 = 5.0163e-04
Loss = 3.0175e-01, PNorm = 39.1552, GNorm = 10.8179, lr_0 = 5.0538e-04
Loss = 2.8698e-01, PNorm = 39.1683, GNorm = 9.8259, lr_0 = 5.0913e-04
Loss = 2.7243e-01, PNorm = 39.1757, GNorm = 5.2151, lr_0 = 5.1287e-04
Loss = 3.1112e-01, PNorm = 39.1836, GNorm = 2.4572, lr_0 = 5.1663e-04
Loss = 2.5226e-01, PNorm = 39.1989, GNorm = 7.3219, lr_0 = 5.2038e-04
Loss = 2.9175e-01, PNorm = 39.2105, GNorm = 2.0939, lr_0 = 5.2413e-04
Loss = 2.6966e-01, PNorm = 39.2218, GNorm = 9.7359, lr_0 = 5.2788e-04
Loss = 2.7312e-01, PNorm = 39.2354, GNorm = 2.9915, lr_0 = 5.3162e-04
Loss = 2.2545e-01, PNorm = 39.2499, GNorm = 1.8076, lr_0 = 5.3538e-04
Loss = 3.0233e-01, PNorm = 39.2630, GNorm = 10.0738, lr_0 = 5.3912e-04
Loss = 2.6684e-01, PNorm = 39.2729, GNorm = 4.9515, lr_0 = 5.4288e-04
Loss = 2.6664e-01, PNorm = 39.2858, GNorm = 7.5615, lr_0 = 5.4663e-04
Loss = 2.8042e-01, PNorm = 39.2996, GNorm = 6.4055, lr_0 = 5.5038e-04
Validation mae = 0.675373
Epoch 1
Loss = 3.2840e-01, PNorm = 39.3139, GNorm = 13.4642, lr_0 = 5.5413e-04
Loss = 3.5925e-01, PNorm = 39.3280, GNorm = 3.0185, lr_0 = 5.5787e-04
Loss = 2.8059e-01, PNorm = 39.3493, GNorm = 3.4879, lr_0 = 5.6163e-04
Loss = 2.7585e-01, PNorm = 39.3697, GNorm = 2.5018, lr_0 = 5.6538e-04
Loss = 2.9249e-01, PNorm = 39.3843, GNorm = 5.5183, lr_0 = 5.6913e-04
Loss = 2.7897e-01, PNorm = 39.3949, GNorm = 2.0865, lr_0 = 5.7288e-04
Loss = 2.6857e-01, PNorm = 39.4051, GNorm = 4.0557, lr_0 = 5.7662e-04
Loss = 2.8883e-01, PNorm = 39.4199, GNorm = 3.4087, lr_0 = 5.8038e-04
Loss = 3.0277e-01, PNorm = 39.4370, GNorm = 1.8312, lr_0 = 5.8413e-04
Loss = 2.5292e-01, PNorm = 39.4506, GNorm = 8.0385, lr_0 = 5.8788e-04
Loss = 2.2835e-01, PNorm = 39.4640, GNorm = 2.4405, lr_0 = 5.9163e-04
Loss = 2.6217e-01, PNorm = 39.4751, GNorm = 4.8095, lr_0 = 5.9538e-04
Loss = 2.7276e-01, PNorm = 39.4893, GNorm = 4.9981, lr_0 = 5.9913e-04
Loss = 2.6774e-01, PNorm = 39.5053, GNorm = 12.7011, lr_0 = 6.0288e-04
Loss = 3.4557e-01, PNorm = 39.5220, GNorm = 11.0782, lr_0 = 6.0663e-04
Loss = 2.8217e-01, PNorm = 39.5407, GNorm = 3.6961, lr_0 = 6.1038e-04
Loss = 2.6539e-01, PNorm = 39.5605, GNorm = 5.7629, lr_0 = 6.1413e-04
Loss = 2.3867e-01, PNorm = 39.5795, GNorm = 2.2379, lr_0 = 6.1788e-04
Loss = 2.5575e-01, PNorm = 39.5924, GNorm = 3.0526, lr_0 = 6.2163e-04
Loss = 2.6132e-01, PNorm = 39.6029, GNorm = 3.7692, lr_0 = 6.2538e-04
Loss = 2.6802e-01, PNorm = 39.6177, GNorm = 2.9621, lr_0 = 6.2913e-04
Loss = 2.3345e-01, PNorm = 39.6333, GNorm = 1.9270, lr_0 = 6.3288e-04
Loss = 2.3407e-01, PNorm = 39.6506, GNorm = 5.5419, lr_0 = 6.3663e-04
Loss = 2.7434e-01, PNorm = 39.6635, GNorm = 2.7486, lr_0 = 6.4038e-04
Loss = 2.9131e-01, PNorm = 39.6777, GNorm = 11.9486, lr_0 = 6.4413e-04
Loss = 3.0918e-01, PNorm = 39.6951, GNorm = 4.9906, lr_0 = 6.4788e-04
Loss = 2.6774e-01, PNorm = 39.7177, GNorm = 1.9239, lr_0 = 6.5163e-04
Loss = 2.6658e-01, PNorm = 39.7446, GNorm = 1.6073, lr_0 = 6.5538e-04
Loss = 2.8087e-01, PNorm = 39.7640, GNorm = 4.8985, lr_0 = 6.5913e-04
Loss = 2.9290e-01, PNorm = 39.7825, GNorm = 9.6243, lr_0 = 6.6288e-04
Loss = 2.6759e-01, PNorm = 39.7969, GNorm = 6.0037, lr_0 = 6.6663e-04
Loss = 3.0190e-01, PNorm = 39.8213, GNorm = 10.6512, lr_0 = 6.7038e-04
Loss = 2.4482e-01, PNorm = 39.8403, GNorm = 0.8950, lr_0 = 6.7413e-04
Loss = 2.3730e-01, PNorm = 39.8585, GNorm = 1.1758, lr_0 = 6.7788e-04
Loss = 2.2517e-01, PNorm = 39.8753, GNorm = 1.7060, lr_0 = 6.8163e-04
Loss = 2.8162e-01, PNorm = 39.8920, GNorm = 3.4188, lr_0 = 6.8538e-04
Loss = 2.3583e-01, PNorm = 39.9085, GNorm = 2.0240, lr_0 = 6.8913e-04
Loss = 2.6696e-01, PNorm = 39.9233, GNorm = 1.8140, lr_0 = 6.9288e-04
Loss = 2.5390e-01, PNorm = 39.9465, GNorm = 8.0951, lr_0 = 6.9663e-04
Loss = 2.8402e-01, PNorm = 39.9680, GNorm = 7.0198, lr_0 = 7.0038e-04
Loss = 3.0308e-01, PNorm = 39.9871, GNorm = 1.3547, lr_0 = 7.0413e-04
Loss = 2.7926e-01, PNorm = 40.0071, GNorm = 2.0086, lr_0 = 7.0788e-04
Loss = 2.2231e-01, PNorm = 40.0254, GNorm = 3.1317, lr_0 = 7.1163e-04
Loss = 3.2511e-01, PNorm = 40.0417, GNorm = 11.8499, lr_0 = 7.1538e-04
Loss = 2.4214e-01, PNorm = 40.0693, GNorm = 3.6391, lr_0 = 7.1913e-04
Loss = 2.7979e-01, PNorm = 40.0971, GNorm = 2.4577, lr_0 = 7.2288e-04
Loss = 2.5200e-01, PNorm = 40.1207, GNorm = 5.9694, lr_0 = 7.2663e-04
Loss = 2.2123e-01, PNorm = 40.1337, GNorm = 3.2903, lr_0 = 7.3038e-04
Loss = 3.7888e-01, PNorm = 40.1439, GNorm = 3.2307, lr_0 = 7.3413e-04
Loss = 3.2068e-01, PNorm = 40.1677, GNorm = 2.5831, lr_0 = 7.3788e-04
Loss = 2.5250e-01, PNorm = 40.1907, GNorm = 5.1437, lr_0 = 7.4163e-04
Loss = 2.4357e-01, PNorm = 40.2173, GNorm = 1.1298, lr_0 = 7.4538e-04
Loss = 2.2819e-01, PNorm = 40.2389, GNorm = 2.2645, lr_0 = 7.4913e-04
Loss = 2.3892e-01, PNorm = 40.2532, GNorm = 1.6498, lr_0 = 7.5288e-04
Loss = 2.6265e-01, PNorm = 40.2657, GNorm = 4.1273, lr_0 = 7.5663e-04
Loss = 2.8705e-01, PNorm = 40.2848, GNorm = 3.2260, lr_0 = 7.6038e-04
Loss = 2.5634e-01, PNorm = 40.3107, GNorm = 0.8670, lr_0 = 7.6413e-04
Loss = 2.3680e-01, PNorm = 40.3378, GNorm = 3.8767, lr_0 = 7.6788e-04
Loss = 2.4544e-01, PNorm = 40.3615, GNorm = 2.0715, lr_0 = 7.7163e-04
Loss = 2.3889e-01, PNorm = 40.3712, GNorm = 1.1519, lr_0 = 7.7538e-04
Loss = 2.3118e-01, PNorm = 40.3935, GNorm = 0.6886, lr_0 = 7.7913e-04
Loss = 2.2848e-01, PNorm = 40.4157, GNorm = 2.8669, lr_0 = 7.8288e-04
Loss = 2.1508e-01, PNorm = 40.4334, GNorm = 3.5620, lr_0 = 7.8663e-04
Loss = 2.0620e-01, PNorm = 40.4559, GNorm = 2.0046, lr_0 = 7.9038e-04
Loss = 2.6157e-01, PNorm = 40.4760, GNorm = 1.6682, lr_0 = 7.9413e-04
Loss = 3.3587e-01, PNorm = 40.4941, GNorm = 6.5611, lr_0 = 7.9788e-04
Loss = 2.8411e-01, PNorm = 40.5185, GNorm = 3.3182, lr_0 = 8.0163e-04
Loss = 2.2267e-01, PNorm = 40.5466, GNorm = 5.0076, lr_0 = 8.0538e-04
Loss = 2.4539e-01, PNorm = 40.5708, GNorm = 3.7427, lr_0 = 8.0913e-04
Loss = 2.1566e-01, PNorm = 40.5906, GNorm = 3.2789, lr_0 = 8.1288e-04
Loss = 2.4077e-01, PNorm = 40.6065, GNorm = 1.8923, lr_0 = 8.1663e-04
Loss = 2.1842e-01, PNorm = 40.6304, GNorm = 7.3339, lr_0 = 8.2038e-04
Loss = 2.2989e-01, PNorm = 40.6483, GNorm = 1.0074, lr_0 = 8.2413e-04
Loss = 2.6252e-01, PNorm = 40.6687, GNorm = 1.4879, lr_0 = 8.2788e-04
Loss = 3.2464e-01, PNorm = 40.6965, GNorm = 2.8832, lr_0 = 8.3163e-04
Loss = 2.4056e-01, PNorm = 40.7284, GNorm = 1.9346, lr_0 = 8.3538e-04
Loss = 1.9295e-01, PNorm = 40.7523, GNorm = 3.1362, lr_0 = 8.3913e-04
Loss = 2.1208e-01, PNorm = 40.7785, GNorm = 1.7518, lr_0 = 8.4288e-04
Loss = 1.9864e-01, PNorm = 40.7942, GNorm = 2.1595, lr_0 = 8.4663e-04
Loss = 2.4860e-01, PNorm = 40.8112, GNorm = 9.7982, lr_0 = 8.5038e-04
Loss = 2.8740e-01, PNorm = 40.8341, GNorm = 6.4172, lr_0 = 8.5413e-04
Loss = 2.5384e-01, PNorm = 40.8657, GNorm = 4.6502, lr_0 = 8.5788e-04
Loss = 2.2008e-01, PNorm = 40.8908, GNorm = 2.7762, lr_0 = 8.6163e-04
Loss = 2.4064e-01, PNorm = 40.9049, GNorm = 5.5123, lr_0 = 8.6538e-04
Loss = 2.5400e-01, PNorm = 40.9311, GNorm = 1.9686, lr_0 = 8.6913e-04
Loss = 2.2874e-01, PNorm = 40.9595, GNorm = 1.5797, lr_0 = 8.7288e-04
Loss = 2.2082e-01, PNorm = 40.9844, GNorm = 1.5765, lr_0 = 8.7663e-04
Loss = 2.5174e-01, PNorm = 41.0040, GNorm = 3.7482, lr_0 = 8.8038e-04
Loss = 2.2049e-01, PNorm = 41.0223, GNorm = 9.3644, lr_0 = 8.8413e-04
Loss = 2.6608e-01, PNorm = 41.0364, GNorm = 4.8270, lr_0 = 8.8788e-04
Loss = 2.3905e-01, PNorm = 41.0678, GNorm = 1.0624, lr_0 = 8.9163e-04
Loss = 2.4702e-01, PNorm = 41.0972, GNorm = 2.5453, lr_0 = 8.9538e-04
Loss = 2.3267e-01, PNorm = 41.1268, GNorm = 1.6822, lr_0 = 8.9913e-04
Loss = 2.5303e-01, PNorm = 41.1556, GNorm = 3.3815, lr_0 = 9.0288e-04
Loss = 2.4590e-01, PNorm = 41.1904, GNorm = 2.1382, lr_0 = 9.0663e-04
Loss = 2.2070e-01, PNorm = 41.2237, GNorm = 3.6711, lr_0 = 9.1038e-04
Loss = 2.4340e-01, PNorm = 41.2491, GNorm = 2.7484, lr_0 = 9.1413e-04
Loss = 2.4807e-01, PNorm = 41.2819, GNorm = 1.1236, lr_0 = 9.1788e-04
Loss = 2.0963e-01, PNorm = 41.3098, GNorm = 4.6892, lr_0 = 9.2163e-04
Loss = 2.4452e-01, PNorm = 41.3325, GNorm = 6.8613, lr_0 = 9.2538e-04
Loss = 2.4255e-01, PNorm = 41.3630, GNorm = 3.0445, lr_0 = 9.2913e-04
Loss = 2.4442e-01, PNorm = 41.3998, GNorm = 1.1913, lr_0 = 9.3288e-04
Loss = 2.2324e-01, PNorm = 41.4231, GNorm = 3.6551, lr_0 = 9.3663e-04
Loss = 2.3989e-01, PNorm = 41.4527, GNorm = 3.1657, lr_0 = 9.4038e-04
Loss = 2.2905e-01, PNorm = 41.4808, GNorm = 2.8105, lr_0 = 9.4413e-04
Loss = 2.2235e-01, PNorm = 41.5048, GNorm = 1.7541, lr_0 = 9.4788e-04
Loss = 2.0776e-01, PNorm = 41.5317, GNorm = 1.3823, lr_0 = 9.5163e-04
Loss = 2.0827e-01, PNorm = 41.5516, GNorm = 3.4277, lr_0 = 9.5538e-04
Loss = 2.5403e-01, PNorm = 41.5765, GNorm = 4.0219, lr_0 = 9.5913e-04
Loss = 2.4094e-01, PNorm = 41.6056, GNorm = 1.4726, lr_0 = 9.6288e-04
Loss = 2.4870e-01, PNorm = 41.6434, GNorm = 2.3241, lr_0 = 9.6663e-04
Loss = 2.0787e-01, PNorm = 41.6670, GNorm = 2.6805, lr_0 = 9.7038e-04
Loss = 2.1636e-01, PNorm = 41.6887, GNorm = 3.6914, lr_0 = 9.7413e-04
Loss = 2.3161e-01, PNorm = 41.7115, GNorm = 1.1557, lr_0 = 9.7788e-04
Loss = 2.0699e-01, PNorm = 41.7394, GNorm = 0.9021, lr_0 = 9.8163e-04
Loss = 2.4711e-01, PNorm = 41.7653, GNorm = 1.5618, lr_0 = 9.8537e-04
Loss = 2.3064e-01, PNorm = 41.7999, GNorm = 2.9942, lr_0 = 9.8912e-04
Loss = 2.0268e-01, PNorm = 41.8286, GNorm = 2.8786, lr_0 = 9.9288e-04
Loss = 2.0720e-01, PNorm = 41.8452, GNorm = 1.3031, lr_0 = 9.9663e-04
Loss = 1.9212e-01, PNorm = 41.8746, GNorm = 1.9584, lr_0 = 9.9993e-04
Validation mae = 0.520376
Epoch 2
Loss = 2.3295e-01, PNorm = 41.8958, GNorm = 3.4254, lr_0 = 9.9925e-04
Loss = 2.3938e-01, PNorm = 41.9218, GNorm = 6.8349, lr_0 = 9.9856e-04
Loss = 2.6037e-01, PNorm = 41.9536, GNorm = 4.0520, lr_0 = 9.9788e-04
Loss = 2.1853e-01, PNorm = 41.9836, GNorm = 0.8675, lr_0 = 9.9719e-04
Loss = 2.2315e-01, PNorm = 42.0208, GNorm = 3.0749, lr_0 = 9.9651e-04
Loss = 2.0529e-01, PNorm = 42.0581, GNorm = 2.4293, lr_0 = 9.9583e-04
Loss = 2.2093e-01, PNorm = 42.0987, GNorm = 2.5397, lr_0 = 9.9515e-04
Loss = 2.0686e-01, PNorm = 42.1240, GNorm = 5.3937, lr_0 = 9.9446e-04
Loss = 2.0214e-01, PNorm = 42.1354, GNorm = 1.8720, lr_0 = 9.9378e-04
Loss = 2.0215e-01, PNorm = 42.1600, GNorm = 2.4915, lr_0 = 9.9310e-04
Loss = 2.1959e-01, PNorm = 42.1955, GNorm = 1.4255, lr_0 = 9.9242e-04
Loss = 2.2809e-01, PNorm = 42.2117, GNorm = 2.7228, lr_0 = 9.9174e-04
Loss = 2.4241e-01, PNorm = 42.2303, GNorm = 4.7637, lr_0 = 9.9106e-04
Loss = 2.3630e-01, PNorm = 42.2653, GNorm = 5.3081, lr_0 = 9.9038e-04
Loss = 2.2672e-01, PNorm = 42.3086, GNorm = 4.1307, lr_0 = 9.8971e-04
Loss = 2.0968e-01, PNorm = 42.3484, GNorm = 5.4943, lr_0 = 9.8903e-04
Loss = 2.1945e-01, PNorm = 42.3795, GNorm = 1.2373, lr_0 = 9.8835e-04
Loss = 2.1493e-01, PNorm = 42.4066, GNorm = 2.8492, lr_0 = 9.8767e-04
Loss = 2.0280e-01, PNorm = 42.4357, GNorm = 1.4645, lr_0 = 9.8700e-04
Loss = 2.4944e-01, PNorm = 42.4661, GNorm = 2.8656, lr_0 = 9.8632e-04
Loss = 2.4273e-01, PNorm = 42.5023, GNorm = 1.3772, lr_0 = 9.8564e-04
Loss = 2.1691e-01, PNorm = 42.5363, GNorm = 5.7309, lr_0 = 9.8497e-04
Loss = 2.4041e-01, PNorm = 42.5718, GNorm = 3.9679, lr_0 = 9.8429e-04
Loss = 1.9060e-01, PNorm = 42.6055, GNorm = 2.8401, lr_0 = 9.8362e-04
Loss = 1.9241e-01, PNorm = 42.6298, GNorm = 0.6816, lr_0 = 9.8295e-04
Loss = 1.8534e-01, PNorm = 42.6562, GNorm = 4.2217, lr_0 = 9.8227e-04
Loss = 1.6568e-01, PNorm = 42.6761, GNorm = 1.1768, lr_0 = 9.8160e-04
Loss = 2.1982e-01, PNorm = 42.6966, GNorm = 2.5157, lr_0 = 9.8093e-04
Loss = 1.9645e-01, PNorm = 42.7216, GNorm = 2.5958, lr_0 = 9.8026e-04
Loss = 1.9563e-01, PNorm = 42.7413, GNorm = 1.0259, lr_0 = 9.7958e-04
Loss = 1.9071e-01, PNorm = 42.7572, GNorm = 1.3923, lr_0 = 9.7891e-04
Loss = 1.9322e-01, PNorm = 42.7774, GNorm = 1.0109, lr_0 = 9.7824e-04
Loss = 2.0973e-01, PNorm = 42.7977, GNorm = 2.7544, lr_0 = 9.7757e-04
Loss = 1.9152e-01, PNorm = 42.8189, GNorm = 1.2233, lr_0 = 9.7690e-04
Loss = 2.0401e-01, PNorm = 42.8356, GNorm = 1.6881, lr_0 = 9.7623e-04
Loss = 2.0133e-01, PNorm = 42.8521, GNorm = 2.3089, lr_0 = 9.7556e-04
Loss = 1.8622e-01, PNorm = 42.8771, GNorm = 1.1957, lr_0 = 9.7490e-04
Loss = 2.0293e-01, PNorm = 42.8992, GNorm = 2.7826, lr_0 = 9.7423e-04
Loss = 2.0496e-01, PNorm = 42.9230, GNorm = 1.9304, lr_0 = 9.7356e-04
Loss = 1.6848e-01, PNorm = 42.9437, GNorm = 0.7310, lr_0 = 9.7289e-04
Loss = 1.8008e-01, PNorm = 42.9653, GNorm = 3.6041, lr_0 = 9.7223e-04
Loss = 1.7266e-01, PNorm = 42.9842, GNorm = 3.2242, lr_0 = 9.7156e-04
Loss = 1.8027e-01, PNorm = 43.0124, GNorm = 1.2133, lr_0 = 9.7090e-04
Loss = 1.7587e-01, PNorm = 43.0476, GNorm = 1.0966, lr_0 = 9.7023e-04
Loss = 1.9141e-01, PNorm = 43.0742, GNorm = 1.2371, lr_0 = 9.6957e-04
Loss = 2.3854e-01, PNorm = 43.0975, GNorm = 2.4130, lr_0 = 9.6890e-04
Loss = 2.0894e-01, PNorm = 43.1197, GNorm = 1.4849, lr_0 = 9.6824e-04
Loss = 2.1462e-01, PNorm = 43.1433, GNorm = 4.4910, lr_0 = 9.6757e-04
Loss = 2.2029e-01, PNorm = 43.1696, GNorm = 2.8910, lr_0 = 9.6691e-04
Loss = 2.2090e-01, PNorm = 43.2074, GNorm = 3.5870, lr_0 = 9.6625e-04
Loss = 2.1716e-01, PNorm = 43.2387, GNorm = 0.9662, lr_0 = 9.6559e-04
Loss = 1.8575e-01, PNorm = 43.2705, GNorm = 2.2371, lr_0 = 9.6493e-04
Loss = 2.1344e-01, PNorm = 43.3000, GNorm = 1.6940, lr_0 = 9.6427e-04
Loss = 1.9733e-01, PNorm = 43.3222, GNorm = 1.4023, lr_0 = 9.6360e-04
Loss = 2.3265e-01, PNorm = 43.3498, GNorm = 1.3519, lr_0 = 9.6294e-04
Loss = 1.6406e-01, PNorm = 43.3823, GNorm = 1.0505, lr_0 = 9.6228e-04
Loss = 1.7940e-01, PNorm = 43.4046, GNorm = 1.3543, lr_0 = 9.6163e-04
Loss = 2.2053e-01, PNorm = 43.4255, GNorm = 1.4385, lr_0 = 9.6097e-04
Loss = 2.0254e-01, PNorm = 43.4492, GNorm = 4.6110, lr_0 = 9.6031e-04
Loss = 1.9773e-01, PNorm = 43.4673, GNorm = 4.1522, lr_0 = 9.5965e-04
Loss = 2.2080e-01, PNorm = 43.4950, GNorm = 4.6012, lr_0 = 9.5899e-04
Loss = 2.1686e-01, PNorm = 43.5251, GNorm = 0.8370, lr_0 = 9.5834e-04
Loss = 1.8636e-01, PNorm = 43.5489, GNorm = 1.3780, lr_0 = 9.5768e-04
Loss = 1.8642e-01, PNorm = 43.5701, GNorm = 1.6928, lr_0 = 9.5702e-04
Loss = 1.7107e-01, PNorm = 43.5820, GNorm = 1.6775, lr_0 = 9.5637e-04
Loss = 2.2009e-01, PNorm = 43.6032, GNorm = 5.0414, lr_0 = 9.5571e-04
Loss = 2.0387e-01, PNorm = 43.6386, GNorm = 1.1795, lr_0 = 9.5506e-04
Loss = 1.8361e-01, PNorm = 43.6746, GNorm = 2.2695, lr_0 = 9.5440e-04
Loss = 1.8364e-01, PNorm = 43.7038, GNorm = 2.0604, lr_0 = 9.5375e-04
Loss = 1.7881e-01, PNorm = 43.7227, GNorm = 3.9928, lr_0 = 9.5310e-04
Loss = 1.9271e-01, PNorm = 43.7478, GNorm = 2.7115, lr_0 = 9.5244e-04
Loss = 1.8861e-01, PNorm = 43.7687, GNorm = 1.0660, lr_0 = 9.5179e-04
Loss = 1.8112e-01, PNorm = 43.7823, GNorm = 2.6490, lr_0 = 9.5114e-04
Loss = 2.2280e-01, PNorm = 43.8006, GNorm = 3.1290, lr_0 = 9.5049e-04
Loss = 2.2692e-01, PNorm = 43.8369, GNorm = 0.7011, lr_0 = 9.4984e-04
Loss = 1.9723e-01, PNorm = 43.8672, GNorm = 0.8306, lr_0 = 9.4919e-04
Loss = 1.8904e-01, PNorm = 43.8927, GNorm = 2.0369, lr_0 = 9.4854e-04
Loss = 1.6273e-01, PNorm = 43.9222, GNorm = 2.7101, lr_0 = 9.4789e-04
Loss = 1.8771e-01, PNorm = 43.9420, GNorm = 5.6450, lr_0 = 9.4724e-04
Loss = 2.0003e-01, PNorm = 43.9629, GNorm = 0.8540, lr_0 = 9.4659e-04
Loss = 1.9169e-01, PNorm = 43.9929, GNorm = 1.3392, lr_0 = 9.4594e-04
Loss = 1.8631e-01, PNorm = 44.0090, GNorm = 2.3914, lr_0 = 9.4529e-04
Loss = 1.8384e-01, PNorm = 44.0274, GNorm = 1.0716, lr_0 = 9.4464e-04
Loss = 1.6463e-01, PNorm = 44.0454, GNorm = 0.6872, lr_0 = 9.4400e-04
Loss = 1.6978e-01, PNorm = 44.0600, GNorm = 1.3641, lr_0 = 9.4335e-04
Loss = 1.6458e-01, PNorm = 44.0785, GNorm = 1.9316, lr_0 = 9.4270e-04
Loss = 1.8542e-01, PNorm = 44.0955, GNorm = 0.7633, lr_0 = 9.4206e-04
Loss = 1.7250e-01, PNorm = 44.1142, GNorm = 1.0418, lr_0 = 9.4141e-04
Loss = 1.9111e-01, PNorm = 44.1404, GNorm = 1.5272, lr_0 = 9.4077e-04
Loss = 1.5528e-01, PNorm = 44.1580, GNorm = 2.7830, lr_0 = 9.4012e-04
Loss = 1.7403e-01, PNorm = 44.1830, GNorm = 1.9125, lr_0 = 9.3948e-04
Loss = 1.7594e-01, PNorm = 44.2102, GNorm = 1.2463, lr_0 = 9.3884e-04
Loss = 1.9479e-01, PNorm = 44.2260, GNorm = 1.0146, lr_0 = 9.3819e-04
Loss = 1.6568e-01, PNorm = 44.2463, GNorm = 1.8948, lr_0 = 9.3755e-04
Loss = 1.7355e-01, PNorm = 44.2689, GNorm = 1.1363, lr_0 = 9.3691e-04
Loss = 1.8934e-01, PNorm = 44.2924, GNorm = 1.9716, lr_0 = 9.3627e-04
Loss = 1.6860e-01, PNorm = 44.3117, GNorm = 1.9229, lr_0 = 9.3562e-04
Loss = 1.9321e-01, PNorm = 44.3363, GNorm = 4.3405, lr_0 = 9.3498e-04
Loss = 2.1568e-01, PNorm = 44.3648, GNorm = 1.0070, lr_0 = 9.3434e-04
Loss = 1.9487e-01, PNorm = 44.3928, GNorm = 1.3047, lr_0 = 9.3370e-04
Loss = 1.7912e-01, PNorm = 44.4198, GNorm = 1.2384, lr_0 = 9.3306e-04
Loss = 1.7719e-01, PNorm = 44.4461, GNorm = 1.6894, lr_0 = 9.3242e-04
Loss = 1.8249e-01, PNorm = 44.4696, GNorm = 1.2340, lr_0 = 9.3178e-04
Loss = 1.9235e-01, PNorm = 44.4966, GNorm = 1.0236, lr_0 = 9.3115e-04
Loss = 1.8857e-01, PNorm = 44.5307, GNorm = 0.9307, lr_0 = 9.3051e-04
Loss = 1.8835e-01, PNorm = 44.5489, GNorm = 0.8718, lr_0 = 9.2987e-04
Loss = 1.7108e-01, PNorm = 44.5736, GNorm = 3.2341, lr_0 = 9.2923e-04
Loss = 2.1413e-01, PNorm = 44.5962, GNorm = 3.5195, lr_0 = 9.2860e-04
Loss = 2.1123e-01, PNorm = 44.6116, GNorm = 1.2327, lr_0 = 9.2796e-04
Loss = 1.8573e-01, PNorm = 44.6370, GNorm = 4.9269, lr_0 = 9.2733e-04
Loss = 1.9429e-01, PNorm = 44.6709, GNorm = 3.2797, lr_0 = 9.2669e-04
Loss = 1.9773e-01, PNorm = 44.7015, GNorm = 4.3022, lr_0 = 9.2606e-04
Loss = 2.0128e-01, PNorm = 44.7193, GNorm = 1.2416, lr_0 = 9.2542e-04
Loss = 1.5783e-01, PNorm = 44.7426, GNorm = 1.4937, lr_0 = 9.2479e-04
Loss = 1.7383e-01, PNorm = 44.7607, GNorm = 0.9124, lr_0 = 9.2415e-04
Loss = 1.9131e-01, PNorm = 44.7798, GNorm = 7.6934, lr_0 = 9.2352e-04
Loss = 1.9775e-01, PNorm = 44.7994, GNorm = 2.7840, lr_0 = 9.2289e-04
Loss = 1.7878e-01, PNorm = 44.8212, GNorm = 2.0577, lr_0 = 9.2226e-04
Loss = 2.2335e-01, PNorm = 44.8475, GNorm = 2.7057, lr_0 = 9.2162e-04
Loss = 2.3777e-01, PNorm = 44.8759, GNorm = 0.7196, lr_0 = 9.2099e-04
Validation mae = 0.549921
Epoch 3
Loss = 1.9454e-01, PNorm = 44.9054, GNorm = 3.1559, lr_0 = 9.2036e-04
Loss = 1.6072e-01, PNorm = 44.9346, GNorm = 1.8172, lr_0 = 9.1973e-04
Loss = 1.6529e-01, PNorm = 44.9604, GNorm = 0.9608, lr_0 = 9.1910e-04
Loss = 1.6460e-01, PNorm = 44.9838, GNorm = 0.9422, lr_0 = 9.1847e-04
Loss = 1.6467e-01, PNorm = 44.9988, GNorm = 0.9372, lr_0 = 9.1784e-04
Loss = 1.8798e-01, PNorm = 45.0282, GNorm = 1.8492, lr_0 = 9.1721e-04
Loss = 1.6594e-01, PNorm = 45.0585, GNorm = 0.7413, lr_0 = 9.1658e-04
Loss = 1.8139e-01, PNorm = 45.0783, GNorm = 1.5453, lr_0 = 9.1596e-04
Loss = 1.7457e-01, PNorm = 45.1040, GNorm = 1.3816, lr_0 = 9.1533e-04
Loss = 1.6072e-01, PNorm = 45.1299, GNorm = 1.4803, lr_0 = 9.1470e-04
Loss = 1.5802e-01, PNorm = 45.1462, GNorm = 1.8062, lr_0 = 9.1408e-04
Loss = 1.5877e-01, PNorm = 45.1617, GNorm = 1.5224, lr_0 = 9.1345e-04
Loss = 1.9070e-01, PNorm = 45.1790, GNorm = 3.0611, lr_0 = 9.1282e-04
Loss = 1.7559e-01, PNorm = 45.2059, GNorm = 1.3947, lr_0 = 9.1220e-04
Loss = 1.6915e-01, PNorm = 45.2346, GNorm = 1.4124, lr_0 = 9.1157e-04
Loss = 1.5921e-01, PNorm = 45.2533, GNorm = 0.9796, lr_0 = 9.1095e-04
Loss = 1.5976e-01, PNorm = 45.2708, GNorm = 0.8913, lr_0 = 9.1032e-04
Loss = 1.5704e-01, PNorm = 45.2845, GNorm = 0.7688, lr_0 = 9.0970e-04
Loss = 1.7079e-01, PNorm = 45.3018, GNorm = 0.9804, lr_0 = 9.0908e-04
Loss = 1.9285e-01, PNorm = 45.3175, GNorm = 1.0309, lr_0 = 9.0846e-04
Loss = 1.5434e-01, PNorm = 45.3420, GNorm = 1.0880, lr_0 = 9.0783e-04
Loss = 1.3977e-01, PNorm = 45.3637, GNorm = 1.8996, lr_0 = 9.0721e-04
Loss = 1.7046e-01, PNorm = 45.3845, GNorm = 2.2916, lr_0 = 9.0659e-04
Loss = 1.9911e-01, PNorm = 45.4153, GNorm = 3.1285, lr_0 = 9.0597e-04
Loss = 1.4663e-01, PNorm = 45.4343, GNorm = 2.5689, lr_0 = 9.0535e-04
Loss = 1.9479e-01, PNorm = 45.4427, GNorm = 1.1883, lr_0 = 9.0473e-04
Loss = 1.8661e-01, PNorm = 45.4671, GNorm = 1.3132, lr_0 = 9.0411e-04
Loss = 2.1111e-01, PNorm = 45.4933, GNorm = 2.7184, lr_0 = 9.0349e-04
Loss = 1.8202e-01, PNorm = 45.5273, GNorm = 1.5256, lr_0 = 9.0287e-04
Loss = 1.6694e-01, PNorm = 45.5538, GNorm = 2.2052, lr_0 = 9.0225e-04
Loss = 1.7968e-01, PNorm = 45.5748, GNorm = 2.3002, lr_0 = 9.0163e-04
Loss = 1.5813e-01, PNorm = 45.5981, GNorm = 1.8898, lr_0 = 9.0102e-04
Loss = 1.7176e-01, PNorm = 45.6218, GNorm = 1.3188, lr_0 = 9.0040e-04
Loss = 1.8805e-01, PNorm = 45.6429, GNorm = 0.8736, lr_0 = 8.9978e-04
Loss = 1.8624e-01, PNorm = 45.6737, GNorm = 3.3912, lr_0 = 8.9916e-04
Loss = 2.0989e-01, PNorm = 45.7022, GNorm = 1.8280, lr_0 = 8.9855e-04
Loss = 1.6975e-01, PNorm = 45.7318, GNorm = 0.7650, lr_0 = 8.9793e-04
Loss = 1.5140e-01, PNorm = 45.7533, GNorm = 1.0531, lr_0 = 8.9732e-04
Loss = 1.4089e-01, PNorm = 45.7612, GNorm = 1.8133, lr_0 = 8.9670e-04
Loss = 1.4537e-01, PNorm = 45.7831, GNorm = 1.9998, lr_0 = 8.9609e-04
Loss = 1.7859e-01, PNorm = 45.8083, GNorm = 1.0104, lr_0 = 8.9548e-04
Loss = 1.8102e-01, PNorm = 45.8305, GNorm = 2.8270, lr_0 = 8.9486e-04
Loss = 1.5512e-01, PNorm = 45.8459, GNorm = 0.8595, lr_0 = 8.9425e-04
Loss = 1.7940e-01, PNorm = 45.8617, GNorm = 3.1882, lr_0 = 8.9364e-04
Loss = 1.5831e-01, PNorm = 45.8775, GNorm = 1.2996, lr_0 = 8.9302e-04
Loss = 1.7507e-01, PNorm = 45.8940, GNorm = 1.3574, lr_0 = 8.9241e-04
Loss = 1.5491e-01, PNorm = 45.9110, GNorm = 0.9231, lr_0 = 8.9180e-04
Loss = 1.6530e-01, PNorm = 45.9262, GNorm = 0.9379, lr_0 = 8.9119e-04
Loss = 1.6971e-01, PNorm = 45.9454, GNorm = 1.9704, lr_0 = 8.9058e-04
Loss = 1.6905e-01, PNorm = 45.9667, GNorm = 1.7925, lr_0 = 8.8997e-04
Loss = 1.3412e-01, PNorm = 45.9888, GNorm = 1.0659, lr_0 = 8.8936e-04
Loss = 1.6168e-01, PNorm = 46.0010, GNorm = 1.4710, lr_0 = 8.8875e-04
Loss = 1.7409e-01, PNorm = 46.0250, GNorm = 2.7806, lr_0 = 8.8814e-04
Loss = 1.5627e-01, PNorm = 46.0462, GNorm = 1.9080, lr_0 = 8.8753e-04
Loss = 1.5298e-01, PNorm = 46.0731, GNorm = 0.6019, lr_0 = 8.8693e-04
Loss = 1.5933e-01, PNorm = 46.0926, GNorm = 2.2126, lr_0 = 8.8632e-04
Loss = 1.6769e-01, PNorm = 46.1102, GNorm = 0.9126, lr_0 = 8.8571e-04
Loss = 1.5961e-01, PNorm = 46.1366, GNorm = 1.5032, lr_0 = 8.8510e-04
Loss = 1.6373e-01, PNorm = 46.1624, GNorm = 1.2818, lr_0 = 8.8450e-04
Loss = 1.6421e-01, PNorm = 46.1805, GNorm = 0.6373, lr_0 = 8.8389e-04
Loss = 1.9159e-01, PNorm = 46.2062, GNorm = 3.0587, lr_0 = 8.8329e-04
Loss = 1.7288e-01, PNorm = 46.2387, GNorm = 1.6944, lr_0 = 8.8268e-04
Loss = 1.8358e-01, PNorm = 46.2699, GNorm = 1.1094, lr_0 = 8.8208e-04
Loss = 1.6454e-01, PNorm = 46.2862, GNorm = 2.0083, lr_0 = 8.8147e-04
Loss = 1.6392e-01, PNorm = 46.3084, GNorm = 0.8763, lr_0 = 8.8087e-04
Loss = 1.6284e-01, PNorm = 46.3284, GNorm = 1.6962, lr_0 = 8.8026e-04
Loss = 1.9060e-01, PNorm = 46.3571, GNorm = 1.2356, lr_0 = 8.7966e-04
Loss = 1.8880e-01, PNorm = 46.3775, GNorm = 3.4117, lr_0 = 8.7906e-04
Loss = 1.7155e-01, PNorm = 46.4062, GNorm = 1.0533, lr_0 = 8.7846e-04
Loss = 1.5025e-01, PNorm = 46.4304, GNorm = 1.1585, lr_0 = 8.7785e-04
Loss = 1.6804e-01, PNorm = 46.4499, GNorm = 1.3650, lr_0 = 8.7725e-04
Loss = 1.8057e-01, PNorm = 46.4629, GNorm = 2.1339, lr_0 = 8.7665e-04
Loss = 1.6316e-01, PNorm = 46.4774, GNorm = 1.4604, lr_0 = 8.7605e-04
Loss = 2.1318e-01, PNorm = 46.5044, GNorm = 1.1238, lr_0 = 8.7545e-04
Loss = 1.6506e-01, PNorm = 46.5363, GNorm = 1.8889, lr_0 = 8.7485e-04
Loss = 1.5290e-01, PNorm = 46.5578, GNorm = 0.7701, lr_0 = 8.7425e-04
Loss = 1.4937e-01, PNorm = 46.5760, GNorm = 0.8310, lr_0 = 8.7365e-04
Loss = 1.5143e-01, PNorm = 46.5884, GNorm = 0.9912, lr_0 = 8.7306e-04
Loss = 1.6751e-01, PNorm = 46.6037, GNorm = 0.9958, lr_0 = 8.7246e-04
Loss = 1.6401e-01, PNorm = 46.6231, GNorm = 3.6832, lr_0 = 8.7186e-04
Loss = 1.7198e-01, PNorm = 46.6396, GNorm = 2.0094, lr_0 = 8.7126e-04
Loss = 1.4212e-01, PNorm = 46.6564, GNorm = 0.6128, lr_0 = 8.7067e-04
Loss = 1.7146e-01, PNorm = 46.6776, GNorm = 0.8148, lr_0 = 8.7007e-04
Loss = 1.5840e-01, PNorm = 46.7007, GNorm = 1.7093, lr_0 = 8.6947e-04
Loss = 1.6520e-01, PNorm = 46.7177, GNorm = 0.9040, lr_0 = 8.6888e-04
Loss = 1.7330e-01, PNorm = 46.7354, GNorm = 0.6647, lr_0 = 8.6828e-04
Loss = 1.7777e-01, PNorm = 46.7500, GNorm = 1.8755, lr_0 = 8.6769e-04
Loss = 1.5034e-01, PNorm = 46.7762, GNorm = 1.5977, lr_0 = 8.6709e-04
Loss = 1.6397e-01, PNorm = 46.7974, GNorm = 1.2457, lr_0 = 8.6650e-04
Loss = 1.6760e-01, PNorm = 46.8168, GNorm = 1.1686, lr_0 = 8.6590e-04
Loss = 1.6666e-01, PNorm = 46.8426, GNorm = 1.6430, lr_0 = 8.6531e-04
Loss = 1.6228e-01, PNorm = 46.8661, GNorm = 1.3231, lr_0 = 8.6472e-04
Loss = 1.7028e-01, PNorm = 46.8945, GNorm = 0.6617, lr_0 = 8.6413e-04
Loss = 1.8196e-01, PNorm = 46.9159, GNorm = 1.3553, lr_0 = 8.6353e-04
Loss = 1.7091e-01, PNorm = 46.9329, GNorm = 0.8041, lr_0 = 8.6294e-04
Loss = 1.6943e-01, PNorm = 46.9574, GNorm = 2.1223, lr_0 = 8.6235e-04
Loss = 1.9658e-01, PNorm = 46.9796, GNorm = 1.7311, lr_0 = 8.6176e-04
Loss = 1.4240e-01, PNorm = 46.9981, GNorm = 1.5139, lr_0 = 8.6117e-04
Loss = 1.4835e-01, PNorm = 47.0174, GNorm = 0.7781, lr_0 = 8.6058e-04
Loss = 1.6984e-01, PNorm = 47.0377, GNorm = 1.3596, lr_0 = 8.5999e-04
Loss = 1.6090e-01, PNorm = 47.0542, GNorm = 1.7041, lr_0 = 8.5940e-04
Loss = 1.6434e-01, PNorm = 47.0747, GNorm = 1.4264, lr_0 = 8.5881e-04
Loss = 1.6098e-01, PNorm = 47.0908, GNorm = 1.1639, lr_0 = 8.5823e-04
Loss = 1.6700e-01, PNorm = 47.1084, GNorm = 1.9965, lr_0 = 8.5764e-04
Loss = 1.8383e-01, PNorm = 47.1254, GNorm = 1.6902, lr_0 = 8.5705e-04
Loss = 1.9568e-01, PNorm = 47.1574, GNorm = 2.0222, lr_0 = 8.5646e-04
Loss = 1.9956e-01, PNorm = 47.1816, GNorm = 2.5481, lr_0 = 8.5588e-04
Loss = 1.9320e-01, PNorm = 47.2065, GNorm = 2.7239, lr_0 = 8.5529e-04
Loss = 1.7736e-01, PNorm = 47.2388, GNorm = 0.9623, lr_0 = 8.5470e-04
Loss = 1.8359e-01, PNorm = 47.2681, GNorm = 1.2505, lr_0 = 8.5412e-04
Loss = 1.5401e-01, PNorm = 47.2861, GNorm = 1.6845, lr_0 = 8.5353e-04
Loss = 1.6609e-01, PNorm = 47.2945, GNorm = 1.7895, lr_0 = 8.5295e-04
Loss = 1.8166e-01, PNorm = 47.3109, GNorm = 1.0025, lr_0 = 8.5236e-04
Loss = 1.7368e-01, PNorm = 47.3372, GNorm = 0.7260, lr_0 = 8.5178e-04
Loss = 1.2457e-01, PNorm = 47.3633, GNorm = 0.7049, lr_0 = 8.5120e-04
Loss = 1.6878e-01, PNorm = 47.3897, GNorm = 1.9476, lr_0 = 8.5061e-04
Loss = 1.3869e-01, PNorm = 47.3997, GNorm = 0.9016, lr_0 = 8.5003e-04
Loss = 1.5990e-01, PNorm = 47.4123, GNorm = 0.8740, lr_0 = 8.4945e-04
Loss = 1.7030e-01, PNorm = 47.4306, GNorm = 1.6945, lr_0 = 8.4887e-04
Loss = 1.6131e-01, PNorm = 47.4547, GNorm = 0.8090, lr_0 = 8.4828e-04
Validation mae = 0.559765
Epoch 4
Loss = 1.5983e-01, PNorm = 47.4755, GNorm = 1.1329, lr_0 = 8.4770e-04
Loss = 1.8152e-01, PNorm = 47.5099, GNorm = 1.1466, lr_0 = 8.4712e-04
Loss = 1.3953e-01, PNorm = 47.5326, GNorm = 0.7421, lr_0 = 8.4654e-04
Loss = 1.3920e-01, PNorm = 47.5533, GNorm = 0.7938, lr_0 = 8.4596e-04
Loss = 1.8103e-01, PNorm = 47.5718, GNorm = 2.3322, lr_0 = 8.4538e-04
Loss = 1.6170e-01, PNorm = 47.5993, GNorm = 0.7238, lr_0 = 8.4480e-04
Loss = 1.5432e-01, PNorm = 47.6271, GNorm = 1.1604, lr_0 = 8.4423e-04
Loss = 1.6201e-01, PNorm = 47.6535, GNorm = 2.2700, lr_0 = 8.4365e-04
Loss = 1.8686e-01, PNorm = 47.6741, GNorm = 0.7736, lr_0 = 8.4307e-04
Loss = 1.5675e-01, PNorm = 47.6941, GNorm = 1.2886, lr_0 = 8.4249e-04
Loss = 1.7424e-01, PNorm = 47.7170, GNorm = 2.5327, lr_0 = 8.4191e-04
Loss = 1.6702e-01, PNorm = 47.7445, GNorm = 0.9891, lr_0 = 8.4134e-04
Loss = 1.5536e-01, PNorm = 47.7683, GNorm = 1.3046, lr_0 = 8.4076e-04
Loss = 1.4252e-01, PNorm = 47.7884, GNorm = 1.3331, lr_0 = 8.4019e-04
Loss = 1.3745e-01, PNorm = 47.8011, GNorm = 1.0024, lr_0 = 8.3961e-04
Loss = 1.6862e-01, PNorm = 47.8169, GNorm = 1.6010, lr_0 = 8.3903e-04
Loss = 1.3828e-01, PNorm = 47.8333, GNorm = 0.9001, lr_0 = 8.3846e-04
Loss = 1.4709e-01, PNorm = 47.8520, GNorm = 1.6890, lr_0 = 8.3789e-04
Loss = 1.3461e-01, PNorm = 47.8716, GNorm = 0.9992, lr_0 = 8.3731e-04
Loss = 1.4404e-01, PNorm = 47.8906, GNorm = 0.8872, lr_0 = 8.3674e-04
Loss = 1.5445e-01, PNorm = 47.9081, GNorm = 0.6105, lr_0 = 8.3616e-04
Loss = 1.3441e-01, PNorm = 47.9294, GNorm = 0.6580, lr_0 = 8.3559e-04
Loss = 1.6323e-01, PNorm = 47.9436, GNorm = 0.9997, lr_0 = 8.3502e-04
Loss = 1.6527e-01, PNorm = 47.9546, GNorm = 1.6315, lr_0 = 8.3445e-04
Loss = 1.5430e-01, PNorm = 47.9705, GNorm = 1.6608, lr_0 = 8.3388e-04
Loss = 1.5485e-01, PNorm = 47.9886, GNorm = 0.8776, lr_0 = 8.3330e-04
Loss = 1.4112e-01, PNorm = 48.0075, GNorm = 0.7492, lr_0 = 8.3273e-04
Loss = 1.6028e-01, PNorm = 48.0275, GNorm = 0.9005, lr_0 = 8.3216e-04
Loss = 1.4283e-01, PNorm = 48.0547, GNorm = 0.8743, lr_0 = 8.3159e-04
Loss = 1.7890e-01, PNorm = 48.0763, GNorm = 2.5012, lr_0 = 8.3102e-04
Loss = 1.5028e-01, PNorm = 48.0963, GNorm = 1.0672, lr_0 = 8.3045e-04
Loss = 1.3881e-01, PNorm = 48.1150, GNorm = 0.8046, lr_0 = 8.2988e-04
Loss = 1.4597e-01, PNorm = 48.1335, GNorm = 1.5786, lr_0 = 8.2932e-04
Loss = 1.4287e-01, PNorm = 48.1480, GNorm = 0.9046, lr_0 = 8.2875e-04
Loss = 1.4275e-01, PNorm = 48.1672, GNorm = 3.0158, lr_0 = 8.2818e-04
Loss = 1.4401e-01, PNorm = 48.1823, GNorm = 0.7089, lr_0 = 8.2761e-04
Loss = 1.6483e-01, PNorm = 48.1944, GNorm = 0.6822, lr_0 = 8.2705e-04
Loss = 1.6362e-01, PNorm = 48.2104, GNorm = 0.7428, lr_0 = 8.2648e-04
Loss = 1.2208e-01, PNorm = 48.2306, GNorm = 1.8222, lr_0 = 8.2591e-04
Loss = 1.5388e-01, PNorm = 48.2505, GNorm = 1.1342, lr_0 = 8.2535e-04
Loss = 1.4840e-01, PNorm = 48.2805, GNorm = 1.2285, lr_0 = 8.2478e-04
Loss = 1.3664e-01, PNorm = 48.2942, GNorm = 1.1172, lr_0 = 8.2422e-04
Loss = 1.5082e-01, PNorm = 48.3050, GNorm = 0.7125, lr_0 = 8.2365e-04
Loss = 1.6570e-01, PNorm = 48.3195, GNorm = 0.7400, lr_0 = 8.2309e-04
Loss = 1.4381e-01, PNorm = 48.3340, GNorm = 3.1625, lr_0 = 8.2252e-04
Loss = 1.4185e-01, PNorm = 48.3548, GNorm = 0.7145, lr_0 = 8.2196e-04
Loss = 1.4593e-01, PNorm = 48.3789, GNorm = 1.8266, lr_0 = 8.2140e-04
Loss = 1.5160e-01, PNorm = 48.3962, GNorm = 0.9401, lr_0 = 8.2084e-04
Loss = 1.5183e-01, PNorm = 48.4144, GNorm = 1.1830, lr_0 = 8.2027e-04
Loss = 1.6635e-01, PNorm = 48.4268, GNorm = 2.0620, lr_0 = 8.1971e-04
Loss = 1.3405e-01, PNorm = 48.4505, GNorm = 1.3732, lr_0 = 8.1915e-04
Loss = 1.3136e-01, PNorm = 48.4728, GNorm = 0.8970, lr_0 = 8.1859e-04
Loss = 1.4636e-01, PNorm = 48.4943, GNorm = 1.2632, lr_0 = 8.1803e-04
Loss = 1.6742e-01, PNorm = 48.5141, GNorm = 0.9100, lr_0 = 8.1747e-04
Loss = 1.3982e-01, PNorm = 48.5305, GNorm = 0.9153, lr_0 = 8.1691e-04
Loss = 1.6309e-01, PNorm = 48.5460, GNorm = 2.0441, lr_0 = 8.1635e-04
Loss = 1.4679e-01, PNorm = 48.5647, GNorm = 0.7258, lr_0 = 8.1579e-04
Loss = 1.3089e-01, PNorm = 48.5791, GNorm = 1.0637, lr_0 = 8.1523e-04
Loss = 1.4661e-01, PNorm = 48.6029, GNorm = 0.8348, lr_0 = 8.1467e-04
Loss = 1.4773e-01, PNorm = 48.6163, GNorm = 0.8899, lr_0 = 8.1411e-04
Loss = 1.3797e-01, PNorm = 48.6290, GNorm = 0.9474, lr_0 = 8.1355e-04
Loss = 1.7108e-01, PNorm = 48.6510, GNorm = 2.6297, lr_0 = 8.1300e-04
Loss = 1.6234e-01, PNorm = 48.6726, GNorm = 1.1775, lr_0 = 8.1244e-04
Loss = 1.7103e-01, PNorm = 48.6953, GNorm = 1.8663, lr_0 = 8.1188e-04
Loss = 1.4771e-01, PNorm = 48.7202, GNorm = 1.6126, lr_0 = 8.1133e-04
Loss = 1.4665e-01, PNorm = 48.7479, GNorm = 0.9635, lr_0 = 8.1077e-04
Loss = 1.5671e-01, PNorm = 48.7688, GNorm = 1.1131, lr_0 = 8.1022e-04
Loss = 1.4066e-01, PNorm = 48.7912, GNorm = 0.6958, lr_0 = 8.0966e-04
Loss = 1.7899e-01, PNorm = 48.8152, GNorm = 0.8468, lr_0 = 8.0911e-04
Loss = 1.5870e-01, PNorm = 48.8361, GNorm = 1.0202, lr_0 = 8.0855e-04
Loss = 1.4849e-01, PNorm = 48.8599, GNorm = 0.6770, lr_0 = 8.0800e-04
Loss = 1.4790e-01, PNorm = 48.8825, GNorm = 1.6094, lr_0 = 8.0745e-04
Loss = 1.3991e-01, PNorm = 48.8966, GNorm = 2.0034, lr_0 = 8.0689e-04
Loss = 1.3059e-01, PNorm = 48.9147, GNorm = 0.7957, lr_0 = 8.0634e-04
Loss = 1.3787e-01, PNorm = 48.9289, GNorm = 1.7213, lr_0 = 8.0579e-04
Loss = 1.4290e-01, PNorm = 48.9454, GNorm = 2.3933, lr_0 = 8.0523e-04
Loss = 1.4562e-01, PNorm = 48.9691, GNorm = 1.0575, lr_0 = 8.0468e-04
Loss = 1.4627e-01, PNorm = 48.9950, GNorm = 0.9701, lr_0 = 8.0413e-04
Loss = 1.5632e-01, PNorm = 49.0103, GNorm = 1.1261, lr_0 = 8.0358e-04
Loss = 1.4507e-01, PNorm = 49.0325, GNorm = 0.9188, lr_0 = 8.0303e-04
Loss = 1.6059e-01, PNorm = 49.0542, GNorm = 1.6094, lr_0 = 8.0248e-04
Loss = 1.4646e-01, PNorm = 49.0769, GNorm = 1.9673, lr_0 = 8.0193e-04
Loss = 1.5286e-01, PNorm = 49.0973, GNorm = 1.2775, lr_0 = 8.0138e-04
Loss = 1.8526e-01, PNorm = 49.1235, GNorm = 0.6405, lr_0 = 8.0083e-04
Loss = 1.5784e-01, PNorm = 49.1406, GNorm = 1.3097, lr_0 = 8.0028e-04
Loss = 1.3574e-01, PNorm = 49.1578, GNorm = 1.6998, lr_0 = 7.9974e-04
Loss = 1.6268e-01, PNorm = 49.1793, GNorm = 2.9161, lr_0 = 7.9919e-04
Loss = 1.5157e-01, PNorm = 49.2093, GNorm = 0.8198, lr_0 = 7.9864e-04
Loss = 1.2382e-01, PNorm = 49.2256, GNorm = 1.1100, lr_0 = 7.9809e-04
Loss = 1.2802e-01, PNorm = 49.2365, GNorm = 0.7933, lr_0 = 7.9755e-04
Loss = 1.3295e-01, PNorm = 49.2490, GNorm = 1.5908, lr_0 = 7.9700e-04
Loss = 1.4119e-01, PNorm = 49.2644, GNorm = 1.2044, lr_0 = 7.9645e-04
Loss = 1.3973e-01, PNorm = 49.2821, GNorm = 1.3973, lr_0 = 7.9591e-04
Loss = 1.5821e-01, PNorm = 49.2940, GNorm = 2.5975, lr_0 = 7.9536e-04
Loss = 1.4360e-01, PNorm = 49.3129, GNorm = 1.6010, lr_0 = 7.9482e-04
Loss = 1.6988e-01, PNorm = 49.3347, GNorm = 2.6442, lr_0 = 7.9427e-04
Loss = 1.9367e-01, PNorm = 49.3622, GNorm = 1.0518, lr_0 = 7.9373e-04
Loss = 1.5156e-01, PNorm = 49.3830, GNorm = 0.8917, lr_0 = 7.9319e-04
Loss = 1.5373e-01, PNorm = 49.4050, GNorm = 1.7274, lr_0 = 7.9264e-04
Loss = 1.4985e-01, PNorm = 49.4218, GNorm = 1.2175, lr_0 = 7.9210e-04
Loss = 1.3930e-01, PNorm = 49.4391, GNorm = 1.3837, lr_0 = 7.9156e-04
Loss = 1.6267e-01, PNorm = 49.4632, GNorm = 1.0957, lr_0 = 7.9101e-04
Loss = 1.5392e-01, PNorm = 49.4815, GNorm = 2.0449, lr_0 = 7.9047e-04
Loss = 1.4838e-01, PNorm = 49.5020, GNorm = 1.1006, lr_0 = 7.8993e-04
Loss = 1.6065e-01, PNorm = 49.5219, GNorm = 2.1264, lr_0 = 7.8939e-04
Loss = 1.7264e-01, PNorm = 49.5408, GNorm = 1.7025, lr_0 = 7.8885e-04
Loss = 1.6484e-01, PNorm = 49.5534, GNorm = 1.4757, lr_0 = 7.8831e-04
Loss = 1.5646e-01, PNorm = 49.5714, GNorm = 0.9913, lr_0 = 7.8777e-04
Loss = 1.4222e-01, PNorm = 49.5945, GNorm = 1.7825, lr_0 = 7.8723e-04
Loss = 1.3640e-01, PNorm = 49.6098, GNorm = 1.0905, lr_0 = 7.8669e-04
Loss = 1.5735e-01, PNorm = 49.6289, GNorm = 0.5681, lr_0 = 7.8615e-04
Loss = 1.5829e-01, PNorm = 49.6590, GNorm = 1.3807, lr_0 = 7.8561e-04
Loss = 1.5069e-01, PNorm = 49.6786, GNorm = 0.7142, lr_0 = 7.8507e-04
Loss = 1.6140e-01, PNorm = 49.6946, GNorm = 0.8026, lr_0 = 7.8454e-04
Loss = 1.0944e-01, PNorm = 49.7102, GNorm = 1.1393, lr_0 = 7.8400e-04
Loss = 1.2410e-01, PNorm = 49.7198, GNorm = 0.9834, lr_0 = 7.8346e-04
Loss = 1.6286e-01, PNorm = 49.7399, GNorm = 2.6978, lr_0 = 7.8293e-04
Loss = 1.8012e-01, PNorm = 49.7629, GNorm = 2.8429, lr_0 = 7.8239e-04
Loss = 1.5840e-01, PNorm = 49.7816, GNorm = 0.8919, lr_0 = 7.8185e-04
Loss = 1.6321e-01, PNorm = 49.8023, GNorm = 0.8251, lr_0 = 7.8132e-04
Validation mae = 0.443687
Epoch 5
Loss = 1.3608e-01, PNorm = 49.8233, GNorm = 0.8792, lr_0 = 7.8078e-04
Loss = 1.6750e-01, PNorm = 49.8445, GNorm = 1.9478, lr_0 = 7.8025e-04
Loss = 1.4618e-01, PNorm = 49.8658, GNorm = 0.8317, lr_0 = 7.7971e-04
Loss = 1.4604e-01, PNorm = 49.8832, GNorm = 0.8519, lr_0 = 7.7918e-04
Loss = 1.0341e-01, PNorm = 49.8949, GNorm = 0.5405, lr_0 = 7.7864e-04
Loss = 1.2693e-01, PNorm = 49.9062, GNorm = 0.9078, lr_0 = 7.7811e-04
Loss = 1.5593e-01, PNorm = 49.9323, GNorm = 2.0942, lr_0 = 7.7758e-04
Loss = 1.8067e-01, PNorm = 49.9634, GNorm = 1.2425, lr_0 = 7.7705e-04
Loss = 1.3651e-01, PNorm = 49.9910, GNorm = 1.5426, lr_0 = 7.7651e-04
Loss = 1.1868e-01, PNorm = 50.0047, GNorm = 1.0038, lr_0 = 7.7598e-04
Loss = 1.2765e-01, PNorm = 50.0147, GNorm = 0.8916, lr_0 = 7.7545e-04
Loss = 1.3167e-01, PNorm = 50.0371, GNorm = 0.7747, lr_0 = 7.7492e-04
Loss = 1.4468e-01, PNorm = 50.0523, GNorm = 1.1174, lr_0 = 7.7439e-04
Loss = 1.4536e-01, PNorm = 50.0756, GNorm = 1.7629, lr_0 = 7.7386e-04
Loss = 1.3852e-01, PNorm = 50.0983, GNorm = 1.0721, lr_0 = 7.7333e-04
Loss = 1.2312e-01, PNorm = 50.1134, GNorm = 0.6634, lr_0 = 7.7280e-04
Loss = 1.4431e-01, PNorm = 50.1344, GNorm = 1.8502, lr_0 = 7.7227e-04
Loss = 1.6406e-01, PNorm = 50.1629, GNorm = 1.3626, lr_0 = 7.7174e-04
Loss = 1.5521e-01, PNorm = 50.1834, GNorm = 1.4993, lr_0 = 7.7121e-04
Loss = 1.6327e-01, PNorm = 50.2024, GNorm = 1.4790, lr_0 = 7.7068e-04
Loss = 1.5720e-01, PNorm = 50.2297, GNorm = 1.2872, lr_0 = 7.7015e-04
Loss = 1.4724e-01, PNorm = 50.2508, GNorm = 0.9715, lr_0 = 7.6963e-04
Loss = 1.3474e-01, PNorm = 50.2717, GNorm = 0.8060, lr_0 = 7.6910e-04
Loss = 1.2945e-01, PNorm = 50.2883, GNorm = 0.9454, lr_0 = 7.6857e-04
Loss = 1.2434e-01, PNorm = 50.3145, GNorm = 1.0540, lr_0 = 7.6805e-04
Loss = 1.3327e-01, PNorm = 50.3366, GNorm = 1.4549, lr_0 = 7.6752e-04
Loss = 1.6504e-01, PNorm = 50.3588, GNorm = 2.4316, lr_0 = 7.6699e-04
Loss = 1.7480e-01, PNorm = 50.3868, GNorm = 2.9737, lr_0 = 7.6647e-04
Loss = 1.6507e-01, PNorm = 50.4160, GNorm = 2.7806, lr_0 = 7.6594e-04
Loss = 1.4821e-01, PNorm = 50.4484, GNorm = 1.1847, lr_0 = 7.6542e-04
Loss = 1.3374e-01, PNorm = 50.4752, GNorm = 1.2112, lr_0 = 7.6489e-04
Loss = 1.3863e-01, PNorm = 50.4867, GNorm = 0.7286, lr_0 = 7.6437e-04
Loss = 1.3779e-01, PNorm = 50.5057, GNorm = 0.9536, lr_0 = 7.6385e-04
Loss = 1.2541e-01, PNorm = 50.5262, GNorm = 1.1529, lr_0 = 7.6332e-04
Loss = 1.3002e-01, PNorm = 50.5528, GNorm = 1.4796, lr_0 = 7.6280e-04
Loss = 1.3563e-01, PNorm = 50.5751, GNorm = 1.0612, lr_0 = 7.6228e-04
Loss = 1.3455e-01, PNorm = 50.5887, GNorm = 1.2288, lr_0 = 7.6176e-04
Loss = 1.4351e-01, PNorm = 50.6068, GNorm = 0.8562, lr_0 = 7.6123e-04
Loss = 1.3103e-01, PNorm = 50.6285, GNorm = 0.9645, lr_0 = 7.6071e-04
Loss = 1.3531e-01, PNorm = 50.6538, GNorm = 1.3355, lr_0 = 7.6019e-04
Loss = 1.4580e-01, PNorm = 50.6734, GNorm = 1.7119, lr_0 = 7.5967e-04
Loss = 1.3517e-01, PNorm = 50.6923, GNorm = 0.7741, lr_0 = 7.5915e-04
Loss = 1.2945e-01, PNorm = 50.7061, GNorm = 0.6781, lr_0 = 7.5863e-04
Loss = 1.4156e-01, PNorm = 50.7171, GNorm = 1.9129, lr_0 = 7.5811e-04
Loss = 1.4718e-01, PNorm = 50.7280, GNorm = 0.5597, lr_0 = 7.5759e-04
Loss = 1.5312e-01, PNorm = 50.7452, GNorm = 0.7686, lr_0 = 7.5707e-04
Loss = 1.3779e-01, PNorm = 50.7671, GNorm = 1.6947, lr_0 = 7.5655e-04
Loss = 1.3389e-01, PNorm = 50.7838, GNorm = 0.8911, lr_0 = 7.5603e-04
Loss = 1.5264e-01, PNorm = 50.8153, GNorm = 0.8779, lr_0 = 7.5552e-04
Loss = 1.4832e-01, PNorm = 50.8307, GNorm = 1.3404, lr_0 = 7.5500e-04
Loss = 1.6782e-01, PNorm = 50.8487, GNorm = 0.7644, lr_0 = 7.5448e-04
Loss = 1.2265e-01, PNorm = 50.8687, GNorm = 0.8211, lr_0 = 7.5397e-04
Loss = 1.4312e-01, PNorm = 50.8859, GNorm = 2.2074, lr_0 = 7.5345e-04
Loss = 1.6771e-01, PNorm = 50.8998, GNorm = 1.3455, lr_0 = 7.5293e-04
Loss = 1.4741e-01, PNorm = 50.9210, GNorm = 1.9077, lr_0 = 7.5242e-04
Loss = 1.6223e-01, PNorm = 50.9381, GNorm = 1.2687, lr_0 = 7.5190e-04
Loss = 1.3333e-01, PNorm = 50.9528, GNorm = 0.6485, lr_0 = 7.5139e-04
Loss = 1.3523e-01, PNorm = 50.9697, GNorm = 1.1423, lr_0 = 7.5087e-04
Loss = 1.2292e-01, PNorm = 50.9813, GNorm = 1.7670, lr_0 = 7.5036e-04
Loss = 1.3349e-01, PNorm = 50.9950, GNorm = 0.8177, lr_0 = 7.4984e-04
Loss = 1.5482e-01, PNorm = 51.0175, GNorm = 1.4957, lr_0 = 7.4933e-04
Loss = 1.3969e-01, PNorm = 51.0382, GNorm = 0.7289, lr_0 = 7.4882e-04
Loss = 1.3856e-01, PNorm = 51.0580, GNorm = 0.7062, lr_0 = 7.4830e-04
Loss = 1.3600e-01, PNorm = 51.0760, GNorm = 0.8792, lr_0 = 7.4779e-04
Loss = 1.3920e-01, PNorm = 51.0902, GNorm = 1.3643, lr_0 = 7.4728e-04
Loss = 1.2531e-01, PNorm = 51.1067, GNorm = 0.5240, lr_0 = 7.4677e-04
Loss = 1.4371e-01, PNorm = 51.1280, GNorm = 0.9406, lr_0 = 7.4625e-04
Loss = 1.3521e-01, PNorm = 51.1441, GNorm = 0.7375, lr_0 = 7.4574e-04
Loss = 1.3961e-01, PNorm = 51.1555, GNorm = 0.6087, lr_0 = 7.4523e-04
Loss = 1.2812e-01, PNorm = 51.1721, GNorm = 0.4999, lr_0 = 7.4472e-04
Loss = 1.5288e-01, PNorm = 51.1894, GNorm = 1.5936, lr_0 = 7.4421e-04
Loss = 1.2854e-01, PNorm = 51.2065, GNorm = 0.8687, lr_0 = 7.4370e-04
Loss = 1.3290e-01, PNorm = 51.2205, GNorm = 1.0504, lr_0 = 7.4319e-04
Loss = 1.6917e-01, PNorm = 51.2421, GNorm = 1.5100, lr_0 = 7.4268e-04
Loss = 1.3637e-01, PNorm = 51.2698, GNorm = 1.1333, lr_0 = 7.4217e-04
Loss = 1.5651e-01, PNorm = 51.2954, GNorm = 1.7481, lr_0 = 7.4167e-04
Loss = 1.5452e-01, PNorm = 51.3200, GNorm = 0.9103, lr_0 = 7.4116e-04
Loss = 1.5822e-01, PNorm = 51.3400, GNorm = 1.3771, lr_0 = 7.4065e-04
Loss = 1.5217e-01, PNorm = 51.3594, GNorm = 1.1774, lr_0 = 7.4014e-04
Loss = 1.2774e-01, PNorm = 51.3853, GNorm = 1.6597, lr_0 = 7.3964e-04
Loss = 1.4055e-01, PNorm = 51.4035, GNorm = 0.6187, lr_0 = 7.3913e-04
Loss = 1.6536e-01, PNorm = 51.4268, GNorm = 1.0434, lr_0 = 7.3862e-04
Loss = 1.2674e-01, PNorm = 51.4429, GNorm = 0.6656, lr_0 = 7.3812e-04
Loss = 1.2304e-01, PNorm = 51.4535, GNorm = 0.7746, lr_0 = 7.3761e-04
Loss = 1.1628e-01, PNorm = 51.4679, GNorm = 0.5250, lr_0 = 7.3711e-04
Loss = 1.7618e-01, PNorm = 51.4852, GNorm = 1.9366, lr_0 = 7.3660e-04
Loss = 1.3134e-01, PNorm = 51.5081, GNorm = 1.3105, lr_0 = 7.3610e-04
Loss = 1.5367e-01, PNorm = 51.5232, GNorm = 0.8232, lr_0 = 7.3559e-04
Loss = 1.4014e-01, PNorm = 51.5461, GNorm = 0.9799, lr_0 = 7.3509e-04
Loss = 1.4104e-01, PNorm = 51.5666, GNorm = 1.1200, lr_0 = 7.3458e-04
Loss = 1.4794e-01, PNorm = 51.5884, GNorm = 1.0022, lr_0 = 7.3408e-04
Loss = 1.4495e-01, PNorm = 51.6045, GNorm = 1.1923, lr_0 = 7.3358e-04
Loss = 1.4405e-01, PNorm = 51.6228, GNorm = 1.6177, lr_0 = 7.3308e-04
Loss = 1.2817e-01, PNorm = 51.6393, GNorm = 1.4239, lr_0 = 7.3257e-04
Loss = 1.3839e-01, PNorm = 51.6556, GNorm = 2.1863, lr_0 = 7.3207e-04
Loss = 1.4192e-01, PNorm = 51.6733, GNorm = 0.8266, lr_0 = 7.3157e-04
Loss = 1.4021e-01, PNorm = 51.6874, GNorm = 1.0977, lr_0 = 7.3107e-04
Loss = 1.2078e-01, PNorm = 51.6976, GNorm = 0.5666, lr_0 = 7.3057e-04
Loss = 1.5978e-01, PNorm = 51.7183, GNorm = 2.1908, lr_0 = 7.3007e-04
Loss = 1.4668e-01, PNorm = 51.7447, GNorm = 0.6877, lr_0 = 7.2957e-04
Loss = 1.4128e-01, PNorm = 51.7640, GNorm = 0.7048, lr_0 = 7.2907e-04
Loss = 1.5466e-01, PNorm = 51.7901, GNorm = 1.5796, lr_0 = 7.2857e-04
Loss = 1.3621e-01, PNorm = 51.8095, GNorm = 1.0404, lr_0 = 7.2807e-04
Loss = 1.3739e-01, PNorm = 51.8244, GNorm = 1.1161, lr_0 = 7.2757e-04
Loss = 1.3974e-01, PNorm = 51.8342, GNorm = 0.6678, lr_0 = 7.2707e-04
Loss = 1.5810e-01, PNorm = 51.8468, GNorm = 2.4141, lr_0 = 7.2657e-04
Loss = 1.3220e-01, PNorm = 51.8659, GNorm = 1.1454, lr_0 = 7.2608e-04
Loss = 1.5322e-01, PNorm = 51.8884, GNorm = 1.0030, lr_0 = 7.2558e-04
Loss = 1.3311e-01, PNorm = 51.9056, GNorm = 0.7167, lr_0 = 7.2508e-04
Loss = 1.2205e-01, PNorm = 51.9242, GNorm = 0.9838, lr_0 = 7.2458e-04
Loss = 1.2594e-01, PNorm = 51.9401, GNorm = 0.8259, lr_0 = 7.2409e-04
Loss = 1.3145e-01, PNorm = 51.9556, GNorm = 0.7036, lr_0 = 7.2359e-04
Loss = 1.4037e-01, PNorm = 51.9631, GNorm = 1.1781, lr_0 = 7.2310e-04
Loss = 1.5407e-01, PNorm = 51.9789, GNorm = 1.9702, lr_0 = 7.2260e-04
Loss = 1.3426e-01, PNorm = 52.0081, GNorm = 0.6191, lr_0 = 7.2211e-04
Loss = 1.2064e-01, PNorm = 52.0289, GNorm = 1.0586, lr_0 = 7.2161e-04
Loss = 1.5955e-01, PNorm = 52.0398, GNorm = 1.5015, lr_0 = 7.2112e-04
Loss = 1.2866e-01, PNorm = 52.0557, GNorm = 0.8675, lr_0 = 7.2062e-04
Loss = 1.4598e-01, PNorm = 52.0706, GNorm = 0.7438, lr_0 = 7.2013e-04
Loss = 1.1730e-01, PNorm = 52.0815, GNorm = 1.0021, lr_0 = 7.1964e-04
Validation mae = 0.438907
Epoch 6
Loss = 1.2781e-01, PNorm = 52.1013, GNorm = 1.3865, lr_0 = 7.1914e-04
Loss = 1.3737e-01, PNorm = 52.1285, GNorm = 0.7456, lr_0 = 7.1865e-04
Loss = 1.5594e-01, PNorm = 52.1549, GNorm = 0.9821, lr_0 = 7.1816e-04
Loss = 1.1475e-01, PNorm = 52.1803, GNorm = 0.9014, lr_0 = 7.1767e-04
Loss = 1.3503e-01, PNorm = 52.1985, GNorm = 0.9638, lr_0 = 7.1717e-04
Loss = 1.3084e-01, PNorm = 52.2123, GNorm = 2.3936, lr_0 = 7.1668e-04
Loss = 1.1823e-01, PNorm = 52.2257, GNorm = 1.0718, lr_0 = 7.1619e-04
Loss = 1.2470e-01, PNorm = 52.2424, GNorm = 0.6690, lr_0 = 7.1570e-04
Loss = 1.4965e-01, PNorm = 52.2580, GNorm = 3.2708, lr_0 = 7.1521e-04
Loss = 1.1559e-01, PNorm = 52.2780, GNorm = 1.3068, lr_0 = 7.1472e-04
Loss = 1.1221e-01, PNorm = 52.2968, GNorm = 1.1475, lr_0 = 7.1423e-04
Loss = 1.3351e-01, PNorm = 52.3141, GNorm = 0.8535, lr_0 = 7.1374e-04
Loss = 1.2097e-01, PNorm = 52.3345, GNorm = 1.3344, lr_0 = 7.1325e-04
Loss = 1.2470e-01, PNorm = 52.3574, GNorm = 1.5752, lr_0 = 7.1277e-04
Loss = 1.3336e-01, PNorm = 52.3753, GNorm = 0.7188, lr_0 = 7.1228e-04
Loss = 1.4359e-01, PNorm = 52.3891, GNorm = 0.6815, lr_0 = 7.1179e-04
Loss = 1.3486e-01, PNorm = 52.4056, GNorm = 0.8075, lr_0 = 7.1130e-04
Loss = 1.2326e-01, PNorm = 52.4271, GNorm = 0.5847, lr_0 = 7.1081e-04
Loss = 1.3224e-01, PNorm = 52.4475, GNorm = 0.8777, lr_0 = 7.1033e-04
Loss = 1.2410e-01, PNorm = 52.4686, GNorm = 0.8467, lr_0 = 7.0984e-04
Loss = 1.1591e-01, PNorm = 52.4853, GNorm = 0.8482, lr_0 = 7.0935e-04
Loss = 1.1067e-01, PNorm = 52.4930, GNorm = 1.5363, lr_0 = 7.0887e-04
Loss = 1.0915e-01, PNorm = 52.5058, GNorm = 0.6372, lr_0 = 7.0838e-04
Loss = 1.1469e-01, PNorm = 52.5158, GNorm = 0.6352, lr_0 = 7.0790e-04
Loss = 1.1524e-01, PNorm = 52.5252, GNorm = 1.0851, lr_0 = 7.0741e-04
Loss = 1.4618e-01, PNorm = 52.5411, GNorm = 0.9328, lr_0 = 7.0693e-04
Loss = 1.2153e-01, PNorm = 52.5668, GNorm = 0.9717, lr_0 = 7.0644e-04
Loss = 1.2777e-01, PNorm = 52.5864, GNorm = 0.5281, lr_0 = 7.0596e-04
Loss = 1.1286e-01, PNorm = 52.6027, GNorm = 0.7291, lr_0 = 7.0548e-04
Loss = 1.3119e-01, PNorm = 52.6211, GNorm = 1.2994, lr_0 = 7.0499e-04
Loss = 1.2707e-01, PNorm = 52.6382, GNorm = 1.4618, lr_0 = 7.0451e-04
Loss = 1.2376e-01, PNorm = 52.6526, GNorm = 1.7753, lr_0 = 7.0403e-04
Loss = 1.4558e-01, PNorm = 52.6732, GNorm = 0.8694, lr_0 = 7.0354e-04
Loss = 1.0920e-01, PNorm = 52.6900, GNorm = 0.5247, lr_0 = 7.0306e-04
Loss = 1.3016e-01, PNorm = 52.6999, GNorm = 1.2632, lr_0 = 7.0258e-04
Loss = 1.5262e-01, PNorm = 52.7203, GNorm = 2.1213, lr_0 = 7.0210e-04
Loss = 1.4089e-01, PNorm = 52.7386, GNorm = 2.3791, lr_0 = 7.0162e-04
Loss = 1.4296e-01, PNorm = 52.7628, GNorm = 0.6884, lr_0 = 7.0114e-04
Loss = 1.2028e-01, PNorm = 52.7791, GNorm = 1.4703, lr_0 = 7.0066e-04
Loss = 1.4802e-01, PNorm = 52.7972, GNorm = 1.5520, lr_0 = 7.0018e-04
Loss = 1.3462e-01, PNorm = 52.8224, GNorm = 2.0066, lr_0 = 6.9970e-04
Loss = 1.3952e-01, PNorm = 52.8459, GNorm = 1.2749, lr_0 = 6.9922e-04
Loss = 1.4697e-01, PNorm = 52.8724, GNorm = 1.7500, lr_0 = 6.9874e-04
Loss = 1.5898e-01, PNorm = 52.8995, GNorm = 1.1810, lr_0 = 6.9826e-04
Loss = 1.3629e-01, PNorm = 52.9289, GNorm = 0.9988, lr_0 = 6.9778e-04
Loss = 1.4761e-01, PNorm = 52.9557, GNorm = 0.9065, lr_0 = 6.9730e-04
Loss = 1.3538e-01, PNorm = 52.9694, GNorm = 0.6886, lr_0 = 6.9683e-04
Loss = 1.4331e-01, PNorm = 52.9865, GNorm = 0.8170, lr_0 = 6.9635e-04
Loss = 1.4833e-01, PNorm = 53.0065, GNorm = 1.5412, lr_0 = 6.9587e-04
Loss = 1.2726e-01, PNorm = 53.0244, GNorm = 1.9379, lr_0 = 6.9540e-04
Loss = 1.3758e-01, PNorm = 53.0437, GNorm = 1.2024, lr_0 = 6.9492e-04
Loss = 1.3067e-01, PNorm = 53.0650, GNorm = 0.6973, lr_0 = 6.9444e-04
Loss = 1.4886e-01, PNorm = 53.0824, GNorm = 0.6586, lr_0 = 6.9397e-04
Loss = 1.1840e-01, PNorm = 53.0954, GNorm = 0.5390, lr_0 = 6.9349e-04
Loss = 1.2012e-01, PNorm = 53.1099, GNorm = 1.3722, lr_0 = 6.9302e-04
Loss = 1.2163e-01, PNorm = 53.1196, GNorm = 0.8880, lr_0 = 6.9254e-04
Loss = 1.1012e-01, PNorm = 53.1389, GNorm = 1.3339, lr_0 = 6.9207e-04
Loss = 1.1958e-01, PNorm = 53.1547, GNorm = 0.6830, lr_0 = 6.9159e-04
Loss = 1.1694e-01, PNorm = 53.1684, GNorm = 0.7995, lr_0 = 6.9112e-04
Loss = 1.0946e-01, PNorm = 53.1825, GNorm = 0.8167, lr_0 = 6.9065e-04
Loss = 1.3755e-01, PNorm = 53.1974, GNorm = 1.4591, lr_0 = 6.9017e-04
Loss = 1.1604e-01, PNorm = 53.2155, GNorm = 0.7557, lr_0 = 6.8970e-04
Loss = 1.4218e-01, PNorm = 53.2344, GNorm = 0.5648, lr_0 = 6.8923e-04
Loss = 1.2619e-01, PNorm = 53.2551, GNorm = 1.0390, lr_0 = 6.8876e-04
Loss = 1.3656e-01, PNorm = 53.2759, GNorm = 0.5452, lr_0 = 6.8828e-04
Loss = 1.2736e-01, PNorm = 53.2891, GNorm = 0.9961, lr_0 = 6.8781e-04
Loss = 1.2522e-01, PNorm = 53.3072, GNorm = 0.6394, lr_0 = 6.8734e-04
Loss = 1.2596e-01, PNorm = 53.3229, GNorm = 1.2130, lr_0 = 6.8687e-04
Loss = 1.2054e-01, PNorm = 53.3409, GNorm = 0.9287, lr_0 = 6.8640e-04
Loss = 1.4872e-01, PNorm = 53.3551, GNorm = 2.9830, lr_0 = 6.8593e-04
Loss = 1.2185e-01, PNorm = 53.3700, GNorm = 1.4921, lr_0 = 6.8546e-04
Loss = 1.0814e-01, PNorm = 53.3890, GNorm = 0.7881, lr_0 = 6.8499e-04
Loss = 1.1375e-01, PNorm = 53.4124, GNorm = 1.4466, lr_0 = 6.8452e-04
Loss = 1.1698e-01, PNorm = 53.4282, GNorm = 0.5835, lr_0 = 6.8405e-04
Loss = 1.1208e-01, PNorm = 53.4366, GNorm = 1.0307, lr_0 = 6.8358e-04
Loss = 1.1614e-01, PNorm = 53.4500, GNorm = 1.0546, lr_0 = 6.8312e-04
Loss = 1.2237e-01, PNorm = 53.4692, GNorm = 0.7390, lr_0 = 6.8265e-04
Loss = 1.2286e-01, PNorm = 53.4867, GNorm = 0.7652, lr_0 = 6.8218e-04
Loss = 1.2241e-01, PNorm = 53.4979, GNorm = 0.7569, lr_0 = 6.8171e-04
Loss = 1.2173e-01, PNorm = 53.5139, GNorm = 0.8240, lr_0 = 6.8125e-04
Loss = 1.3054e-01, PNorm = 53.5351, GNorm = 0.7083, lr_0 = 6.8078e-04
Loss = 1.3958e-01, PNorm = 53.5574, GNorm = 1.0817, lr_0 = 6.8031e-04
Loss = 1.3442e-01, PNorm = 53.5719, GNorm = 0.9425, lr_0 = 6.7985e-04
Loss = 1.7777e-01, PNorm = 53.5914, GNorm = 2.4833, lr_0 = 6.7938e-04
Loss = 1.4617e-01, PNorm = 53.6145, GNorm = 1.5366, lr_0 = 6.7892e-04
Loss = 1.2608e-01, PNorm = 53.6384, GNorm = 0.9909, lr_0 = 6.7845e-04
Loss = 1.3720e-01, PNorm = 53.6547, GNorm = 1.6274, lr_0 = 6.7799e-04
Loss = 1.0388e-01, PNorm = 53.6685, GNorm = 1.1912, lr_0 = 6.7752e-04
Loss = 1.3825e-01, PNorm = 53.6792, GNorm = 1.7778, lr_0 = 6.7706e-04
Loss = 1.4007e-01, PNorm = 53.6990, GNorm = 1.4603, lr_0 = 6.7659e-04
Loss = 1.2137e-01, PNorm = 53.7204, GNorm = 0.7186, lr_0 = 6.7613e-04
Loss = 1.0826e-01, PNorm = 53.7408, GNorm = 0.9670, lr_0 = 6.7567e-04
Loss = 1.4767e-01, PNorm = 53.7570, GNorm = 0.8374, lr_0 = 6.7520e-04
Loss = 1.3555e-01, PNorm = 53.7778, GNorm = 0.7890, lr_0 = 6.7474e-04
Loss = 1.2065e-01, PNorm = 53.7942, GNorm = 0.8083, lr_0 = 6.7428e-04
Loss = 1.1386e-01, PNorm = 53.8151, GNorm = 0.5235, lr_0 = 6.7382e-04
Loss = 1.2232e-01, PNorm = 53.8278, GNorm = 0.6533, lr_0 = 6.7335e-04
Loss = 1.2485e-01, PNorm = 53.8362, GNorm = 1.3586, lr_0 = 6.7289e-04
Loss = 1.2347e-01, PNorm = 53.8477, GNorm = 0.9430, lr_0 = 6.7243e-04
Loss = 1.2262e-01, PNorm = 53.8667, GNorm = 1.5400, lr_0 = 6.7197e-04
Loss = 1.5576e-01, PNorm = 53.8904, GNorm = 1.0885, lr_0 = 6.7151e-04
Loss = 1.3435e-01, PNorm = 53.9153, GNorm = 1.0881, lr_0 = 6.7105e-04
Loss = 1.3591e-01, PNorm = 53.9338, GNorm = 1.2442, lr_0 = 6.7059e-04
Loss = 1.2323e-01, PNorm = 53.9527, GNorm = 0.7072, lr_0 = 6.7013e-04
Loss = 1.3377e-01, PNorm = 53.9709, GNorm = 0.8384, lr_0 = 6.6967e-04
Loss = 1.1873e-01, PNorm = 53.9850, GNorm = 0.6597, lr_0 = 6.6921e-04
Loss = 1.2480e-01, PNorm = 53.9963, GNorm = 1.2720, lr_0 = 6.6876e-04
Loss = 1.4205e-01, PNorm = 54.0115, GNorm = 0.7218, lr_0 = 6.6830e-04
Loss = 1.3235e-01, PNorm = 54.0309, GNorm = 0.9750, lr_0 = 6.6784e-04
Loss = 1.3079e-01, PNorm = 54.0526, GNorm = 1.1269, lr_0 = 6.6738e-04
Loss = 1.3628e-01, PNorm = 54.0749, GNorm = 0.6136, lr_0 = 6.6693e-04
Loss = 1.4296e-01, PNorm = 54.0895, GNorm = 1.2910, lr_0 = 6.6647e-04
Loss = 1.6900e-01, PNorm = 54.1086, GNorm = 0.6161, lr_0 = 6.6601e-04
Loss = 1.4574e-01, PNorm = 54.1319, GNorm = 0.8669, lr_0 = 6.6556e-04
Loss = 1.1494e-01, PNorm = 54.1549, GNorm = 0.7415, lr_0 = 6.6510e-04
Loss = 1.3966e-01, PNorm = 54.1678, GNorm = 1.0975, lr_0 = 6.6464e-04
Loss = 1.4318e-01, PNorm = 54.1768, GNorm = 0.7986, lr_0 = 6.6419e-04
Loss = 1.2276e-01, PNorm = 54.1889, GNorm = 0.5487, lr_0 = 6.6373e-04
Loss = 1.2950e-01, PNorm = 54.2094, GNorm = 0.5727, lr_0 = 6.6328e-04
Loss = 1.4638e-01, PNorm = 54.2352, GNorm = 0.9694, lr_0 = 6.6282e-04
Validation mae = 0.442473
Epoch 7
Loss = 1.1198e-01, PNorm = 54.2486, GNorm = 1.0110, lr_0 = 6.6237e-04
Loss = 1.2332e-01, PNorm = 54.2646, GNorm = 1.1582, lr_0 = 6.6192e-04
Loss = 1.1693e-01, PNorm = 54.2825, GNorm = 0.8188, lr_0 = 6.6146e-04
Loss = 1.1962e-01, PNorm = 54.3045, GNorm = 1.7630, lr_0 = 6.6101e-04
Loss = 1.3346e-01, PNorm = 54.3329, GNorm = 0.7531, lr_0 = 6.6056e-04
Loss = 1.1811e-01, PNorm = 54.3525, GNorm = 0.9649, lr_0 = 6.6011e-04
Loss = 1.1080e-01, PNorm = 54.3650, GNorm = 0.7618, lr_0 = 6.5965e-04
Loss = 1.1841e-01, PNorm = 54.3808, GNorm = 0.6487, lr_0 = 6.5920e-04
Loss = 1.1073e-01, PNorm = 54.3955, GNorm = 0.9930, lr_0 = 6.5875e-04
Loss = 1.1409e-01, PNorm = 54.4111, GNorm = 0.9332, lr_0 = 6.5830e-04
Loss = 1.2300e-01, PNorm = 54.4303, GNorm = 1.1383, lr_0 = 6.5785e-04
Loss = 9.3594e-02, PNorm = 54.4449, GNorm = 0.9732, lr_0 = 6.5740e-04
Loss = 1.0541e-01, PNorm = 54.4613, GNorm = 0.8430, lr_0 = 6.5695e-04
Loss = 1.2584e-01, PNorm = 54.4806, GNorm = 1.0189, lr_0 = 6.5650e-04
Loss = 1.3274e-01, PNorm = 54.4998, GNorm = 2.5206, lr_0 = 6.5605e-04
Loss = 1.3501e-01, PNorm = 54.5196, GNorm = 0.7308, lr_0 = 6.5560e-04
Loss = 9.4859e-02, PNorm = 54.5339, GNorm = 0.8870, lr_0 = 6.5515e-04
Loss = 1.2018e-01, PNorm = 54.5515, GNorm = 0.7308, lr_0 = 6.5470e-04
Loss = 1.0215e-01, PNorm = 54.5691, GNorm = 1.3928, lr_0 = 6.5425e-04
Loss = 1.2384e-01, PNorm = 54.5838, GNorm = 2.2090, lr_0 = 6.5380e-04
Loss = 1.1800e-01, PNorm = 54.6035, GNorm = 1.2273, lr_0 = 6.5335e-04
Loss = 1.0952e-01, PNorm = 54.6199, GNorm = 1.2416, lr_0 = 6.5291e-04
Loss = 1.0520e-01, PNorm = 54.6364, GNorm = 0.8778, lr_0 = 6.5246e-04
Loss = 1.3736e-01, PNorm = 54.6517, GNorm = 0.9707, lr_0 = 6.5201e-04
Loss = 1.0391e-01, PNorm = 54.6690, GNorm = 0.7775, lr_0 = 6.5157e-04
Loss = 1.0925e-01, PNorm = 54.6766, GNorm = 0.5484, lr_0 = 6.5112e-04
Loss = 1.2206e-01, PNorm = 54.6886, GNorm = 1.4458, lr_0 = 6.5067e-04
Loss = 1.3464e-01, PNorm = 54.7011, GNorm = 0.5553, lr_0 = 6.5023e-04
Loss = 1.1490e-01, PNorm = 54.7186, GNorm = 0.9881, lr_0 = 6.4978e-04
Loss = 1.1962e-01, PNorm = 54.7355, GNorm = 0.9002, lr_0 = 6.4934e-04
Loss = 1.2923e-01, PNorm = 54.7591, GNorm = 0.8696, lr_0 = 6.4889e-04
Loss = 1.3682e-01, PNorm = 54.7889, GNorm = 0.7693, lr_0 = 6.4845e-04
Loss = 1.0202e-01, PNorm = 54.8072, GNorm = 0.8721, lr_0 = 6.4800e-04
Loss = 1.3477e-01, PNorm = 54.8229, GNorm = 0.9138, lr_0 = 6.4756e-04
Loss = 1.2316e-01, PNorm = 54.8361, GNorm = 0.9896, lr_0 = 6.4712e-04
Loss = 1.2917e-01, PNorm = 54.8503, GNorm = 1.0352, lr_0 = 6.4667e-04
Loss = 1.0834e-01, PNorm = 54.8699, GNorm = 0.5907, lr_0 = 6.4623e-04
Loss = 1.2436e-01, PNorm = 54.8857, GNorm = 0.6601, lr_0 = 6.4579e-04
Loss = 1.0864e-01, PNorm = 54.9022, GNorm = 0.5522, lr_0 = 6.4534e-04
Loss = 1.1563e-01, PNorm = 54.9169, GNorm = 0.6018, lr_0 = 6.4490e-04
Loss = 1.1500e-01, PNorm = 54.9326, GNorm = 0.8848, lr_0 = 6.4446e-04
Loss = 1.1475e-01, PNorm = 54.9487, GNorm = 1.3569, lr_0 = 6.4402e-04
Loss = 1.1923e-01, PNorm = 54.9613, GNorm = 1.2408, lr_0 = 6.4358e-04
Loss = 1.1614e-01, PNorm = 54.9800, GNorm = 0.7158, lr_0 = 6.4314e-04
Loss = 1.2941e-01, PNorm = 54.9998, GNorm = 2.0485, lr_0 = 6.4270e-04
Loss = 1.1584e-01, PNorm = 55.0221, GNorm = 1.1521, lr_0 = 6.4226e-04
Loss = 1.3522e-01, PNorm = 55.0410, GNorm = 1.2768, lr_0 = 6.4182e-04
Loss = 1.4498e-01, PNorm = 55.0599, GNorm = 0.8298, lr_0 = 6.4138e-04
Loss = 1.3758e-01, PNorm = 55.0796, GNorm = 1.5029, lr_0 = 6.4094e-04
Loss = 1.2864e-01, PNorm = 55.0986, GNorm = 0.6414, lr_0 = 6.4050e-04
Loss = 1.2450e-01, PNorm = 55.1139, GNorm = 1.0171, lr_0 = 6.4006e-04
Loss = 1.2267e-01, PNorm = 55.1292, GNorm = 1.4715, lr_0 = 6.3962e-04
Loss = 1.3298e-01, PNorm = 55.1469, GNorm = 1.7573, lr_0 = 6.3918e-04
Loss = 1.1534e-01, PNorm = 55.1673, GNorm = 1.0069, lr_0 = 6.3874e-04
Loss = 1.2224e-01, PNorm = 55.1824, GNorm = 0.7979, lr_0 = 6.3831e-04
Loss = 1.0930e-01, PNorm = 55.1956, GNorm = 0.7145, lr_0 = 6.3787e-04
Loss = 1.1806e-01, PNorm = 55.2149, GNorm = 0.5891, lr_0 = 6.3743e-04
Loss = 1.3097e-01, PNorm = 55.2287, GNorm = 0.8996, lr_0 = 6.3700e-04
Loss = 1.1865e-01, PNorm = 55.2388, GNorm = 0.7871, lr_0 = 6.3656e-04
Loss = 1.2399e-01, PNorm = 55.2531, GNorm = 1.4251, lr_0 = 6.3612e-04
Loss = 1.1572e-01, PNorm = 55.2705, GNorm = 0.6994, lr_0 = 6.3569e-04
Loss = 1.2081e-01, PNorm = 55.2869, GNorm = 0.9986, lr_0 = 6.3525e-04
Loss = 1.2147e-01, PNorm = 55.3073, GNorm = 1.1510, lr_0 = 6.3482e-04
Loss = 1.1619e-01, PNorm = 55.3233, GNorm = 0.7280, lr_0 = 6.3438e-04
Loss = 1.1302e-01, PNorm = 55.3448, GNorm = 0.6013, lr_0 = 6.3395e-04
Loss = 1.2849e-01, PNorm = 55.3630, GNorm = 0.7324, lr_0 = 6.3351e-04
Loss = 1.1678e-01, PNorm = 55.3845, GNorm = 0.5646, lr_0 = 6.3308e-04
Loss = 1.3940e-01, PNorm = 55.3998, GNorm = 0.6122, lr_0 = 6.3265e-04
Loss = 1.3742e-01, PNorm = 55.4223, GNorm = 0.7600, lr_0 = 6.3221e-04
Loss = 1.1604e-01, PNorm = 55.4382, GNorm = 0.8180, lr_0 = 6.3178e-04
Loss = 1.0541e-01, PNorm = 55.4480, GNorm = 0.4935, lr_0 = 6.3135e-04
Loss = 1.2118e-01, PNorm = 55.4561, GNorm = 1.2467, lr_0 = 6.3091e-04
Loss = 1.2155e-01, PNorm = 55.4697, GNorm = 1.2386, lr_0 = 6.3048e-04
Loss = 1.2219e-01, PNorm = 55.4843, GNorm = 0.7030, lr_0 = 6.3005e-04
Loss = 9.7717e-02, PNorm = 55.5003, GNorm = 0.5140, lr_0 = 6.2962e-04
Loss = 1.2864e-01, PNorm = 55.5213, GNorm = 1.1289, lr_0 = 6.2919e-04
Loss = 1.2369e-01, PNorm = 55.5369, GNorm = 0.7908, lr_0 = 6.2876e-04
Loss = 1.0944e-01, PNorm = 55.5548, GNorm = 0.5089, lr_0 = 6.2833e-04
Loss = 1.0500e-01, PNorm = 55.5702, GNorm = 0.7504, lr_0 = 6.2789e-04
Loss = 1.1653e-01, PNorm = 55.5819, GNorm = 0.9002, lr_0 = 6.2746e-04
Loss = 1.2955e-01, PNorm = 55.5996, GNorm = 1.3482, lr_0 = 6.2703e-04
Loss = 1.3204e-01, PNorm = 55.6161, GNorm = 0.8277, lr_0 = 6.2661e-04
Loss = 1.0808e-01, PNorm = 55.6408, GNorm = 1.1959, lr_0 = 6.2618e-04
Loss = 1.1152e-01, PNorm = 55.6570, GNorm = 0.7066, lr_0 = 6.2575e-04
Loss = 1.4294e-01, PNorm = 55.6732, GNorm = 0.7466, lr_0 = 6.2532e-04
Loss = 1.1046e-01, PNorm = 55.6892, GNorm = 1.2817, lr_0 = 6.2489e-04
Loss = 1.1506e-01, PNorm = 55.7006, GNorm = 0.6929, lr_0 = 6.2446e-04
Loss = 1.1642e-01, PNorm = 55.7141, GNorm = 0.9190, lr_0 = 6.2403e-04
Loss = 1.1435e-01, PNorm = 55.7242, GNorm = 1.1435, lr_0 = 6.2361e-04
Loss = 1.0901e-01, PNorm = 55.7394, GNorm = 0.8651, lr_0 = 6.2318e-04
Loss = 1.0975e-01, PNorm = 55.7518, GNorm = 0.6596, lr_0 = 6.2275e-04
Loss = 1.2091e-01, PNorm = 55.7665, GNorm = 1.0383, lr_0 = 6.2233e-04
Loss = 1.1590e-01, PNorm = 55.7804, GNorm = 0.4803, lr_0 = 6.2190e-04
Loss = 1.2481e-01, PNorm = 55.7929, GNorm = 0.8759, lr_0 = 6.2147e-04
Loss = 1.1006e-01, PNorm = 55.8118, GNorm = 1.5055, lr_0 = 6.2105e-04
Loss = 1.0595e-01, PNorm = 55.8238, GNorm = 0.7697, lr_0 = 6.2062e-04
Loss = 1.1068e-01, PNorm = 55.8329, GNorm = 0.6702, lr_0 = 6.2020e-04
Loss = 1.1382e-01, PNorm = 55.8441, GNorm = 0.8677, lr_0 = 6.1977e-04
Loss = 1.0055e-01, PNorm = 55.8554, GNorm = 0.7164, lr_0 = 6.1935e-04
Loss = 1.1711e-01, PNorm = 55.8692, GNorm = 0.5109, lr_0 = 6.1892e-04
Loss = 1.1581e-01, PNorm = 55.8810, GNorm = 1.5650, lr_0 = 6.1850e-04
Loss = 1.1948e-01, PNorm = 55.8925, GNorm = 1.0056, lr_0 = 6.1808e-04
Loss = 1.2783e-01, PNorm = 55.9093, GNorm = 0.6824, lr_0 = 6.1765e-04
Loss = 1.0736e-01, PNorm = 55.9278, GNorm = 1.0315, lr_0 = 6.1723e-04
Loss = 1.1593e-01, PNorm = 55.9384, GNorm = 0.5920, lr_0 = 6.1681e-04
Loss = 1.2606e-01, PNorm = 55.9520, GNorm = 1.5827, lr_0 = 6.1638e-04
Loss = 1.1834e-01, PNorm = 55.9707, GNorm = 0.5173, lr_0 = 6.1596e-04
Loss = 1.1219e-01, PNorm = 55.9833, GNorm = 0.8043, lr_0 = 6.1554e-04
Loss = 1.1469e-01, PNorm = 55.9959, GNorm = 0.7047, lr_0 = 6.1512e-04
Loss = 1.0206e-01, PNorm = 56.0078, GNorm = 0.9710, lr_0 = 6.1470e-04
Loss = 1.3253e-01, PNorm = 56.0185, GNorm = 0.9224, lr_0 = 6.1428e-04
Loss = 1.1585e-01, PNorm = 56.0270, GNorm = 0.8361, lr_0 = 6.1385e-04
Loss = 1.3108e-01, PNorm = 56.0406, GNorm = 1.2605, lr_0 = 6.1343e-04
Loss = 1.1316e-01, PNorm = 56.0542, GNorm = 1.2489, lr_0 = 6.1301e-04
Loss = 1.1726e-01, PNorm = 56.0654, GNorm = 1.0266, lr_0 = 6.1259e-04
Loss = 1.2080e-01, PNorm = 56.0807, GNorm = 0.9854, lr_0 = 6.1217e-04
Loss = 1.2797e-01, PNorm = 56.0940, GNorm = 1.0722, lr_0 = 6.1175e-04
Loss = 1.0985e-01, PNorm = 56.1130, GNorm = 0.6746, lr_0 = 6.1134e-04
Loss = 1.3966e-01, PNorm = 56.1305, GNorm = 0.9366, lr_0 = 6.1092e-04
Loss = 1.3988e-01, PNorm = 56.1478, GNorm = 1.4230, lr_0 = 6.1050e-04
Validation mae = 0.443803
Epoch 8
Loss = 1.1382e-01, PNorm = 56.1658, GNorm = 1.1760, lr_0 = 6.1008e-04
Loss = 1.3061e-01, PNorm = 56.1866, GNorm = 1.9680, lr_0 = 6.0966e-04
Loss = 1.2546e-01, PNorm = 56.2054, GNorm = 1.8571, lr_0 = 6.0924e-04
Loss = 1.1824e-01, PNorm = 56.2233, GNorm = 1.0507, lr_0 = 6.0883e-04
Loss = 1.2329e-01, PNorm = 56.2414, GNorm = 1.3749, lr_0 = 6.0841e-04
Loss = 1.1273e-01, PNorm = 56.2639, GNorm = 0.7353, lr_0 = 6.0799e-04
Loss = 1.0485e-01, PNorm = 56.2821, GNorm = 0.6218, lr_0 = 6.0758e-04
Loss = 1.1935e-01, PNorm = 56.2953, GNorm = 0.6965, lr_0 = 6.0716e-04
Loss = 9.7863e-02, PNorm = 56.3081, GNorm = 0.6410, lr_0 = 6.0674e-04
Loss = 1.0527e-01, PNorm = 56.3243, GNorm = 0.7250, lr_0 = 6.0633e-04
Loss = 9.8797e-02, PNorm = 56.3427, GNorm = 1.5269, lr_0 = 6.0591e-04
Loss = 9.7999e-02, PNorm = 56.3525, GNorm = 1.3320, lr_0 = 6.0550e-04
Loss = 1.0479e-01, PNorm = 56.3722, GNorm = 1.4092, lr_0 = 6.0508e-04
Loss = 1.0045e-01, PNorm = 56.3899, GNorm = 0.5156, lr_0 = 6.0467e-04
Loss = 1.0226e-01, PNorm = 56.4075, GNorm = 0.9943, lr_0 = 6.0425e-04
Loss = 9.6455e-02, PNorm = 56.4211, GNorm = 0.8149, lr_0 = 6.0384e-04
Loss = 1.0131e-01, PNorm = 56.4328, GNorm = 1.5470, lr_0 = 6.0343e-04
Loss = 1.0218e-01, PNorm = 56.4487, GNorm = 0.5394, lr_0 = 6.0301e-04
Loss = 1.0931e-01, PNorm = 56.4590, GNorm = 2.2363, lr_0 = 6.0260e-04
Loss = 9.0468e-02, PNorm = 56.4676, GNorm = 0.6420, lr_0 = 6.0219e-04
Loss = 1.0116e-01, PNorm = 56.4787, GNorm = 0.6679, lr_0 = 6.0178e-04
Loss = 1.0760e-01, PNorm = 56.4959, GNorm = 0.6909, lr_0 = 6.0136e-04
Loss = 1.1505e-01, PNorm = 56.5070, GNorm = 0.8988, lr_0 = 6.0095e-04
Loss = 1.0365e-01, PNorm = 56.5180, GNorm = 0.9855, lr_0 = 6.0054e-04
Loss = 1.0268e-01, PNorm = 56.5350, GNorm = 0.9549, lr_0 = 6.0013e-04
Loss = 1.1360e-01, PNorm = 56.5523, GNorm = 1.7046, lr_0 = 5.9972e-04
Loss = 1.1706e-01, PNorm = 56.5724, GNorm = 1.0153, lr_0 = 5.9931e-04
Loss = 1.0355e-01, PNorm = 56.5828, GNorm = 0.6470, lr_0 = 5.9890e-04
Loss = 1.0646e-01, PNorm = 56.5911, GNorm = 1.2333, lr_0 = 5.9849e-04
Loss = 1.0271e-01, PNorm = 56.6072, GNorm = 0.6023, lr_0 = 5.9808e-04
Loss = 1.1419e-01, PNorm = 56.6267, GNorm = 0.8875, lr_0 = 5.9767e-04
Loss = 1.1097e-01, PNorm = 56.6450, GNorm = 0.8851, lr_0 = 5.9726e-04
Loss = 1.1233e-01, PNorm = 56.6640, GNorm = 1.3918, lr_0 = 5.9685e-04
Loss = 1.0883e-01, PNorm = 56.6810, GNorm = 0.4900, lr_0 = 5.9644e-04
Loss = 1.0256e-01, PNorm = 56.6967, GNorm = 0.7897, lr_0 = 5.9603e-04
Loss = 1.0529e-01, PNorm = 56.7139, GNorm = 0.6687, lr_0 = 5.9562e-04
Loss = 1.2860e-01, PNorm = 56.7248, GNorm = 1.5928, lr_0 = 5.9521e-04
Loss = 1.0304e-01, PNorm = 56.7421, GNorm = 0.9818, lr_0 = 5.9481e-04
Loss = 1.0269e-01, PNorm = 56.7613, GNorm = 0.5692, lr_0 = 5.9440e-04
Loss = 1.0811e-01, PNorm = 56.7681, GNorm = 1.0267, lr_0 = 5.9399e-04
Loss = 1.0002e-01, PNorm = 56.7814, GNorm = 0.6491, lr_0 = 5.9358e-04
Loss = 1.0818e-01, PNorm = 56.7927, GNorm = 0.6523, lr_0 = 5.9318e-04
Loss = 1.0846e-01, PNorm = 56.8063, GNorm = 0.7391, lr_0 = 5.9277e-04
Loss = 1.1669e-01, PNorm = 56.8159, GNorm = 0.9330, lr_0 = 5.9236e-04
Loss = 1.0727e-01, PNorm = 56.8326, GNorm = 0.6991, lr_0 = 5.9196e-04
Loss = 1.0540e-01, PNorm = 56.8464, GNorm = 0.5955, lr_0 = 5.9155e-04
Loss = 1.1240e-01, PNorm = 56.8611, GNorm = 0.7458, lr_0 = 5.9115e-04
Loss = 1.0796e-01, PNorm = 56.8726, GNorm = 0.9358, lr_0 = 5.9074e-04
Loss = 9.9974e-02, PNorm = 56.8902, GNorm = 0.8711, lr_0 = 5.9034e-04
Loss = 1.0630e-01, PNorm = 56.9050, GNorm = 0.6673, lr_0 = 5.8993e-04
Loss = 1.1143e-01, PNorm = 56.9196, GNorm = 1.2233, lr_0 = 5.8953e-04
Loss = 1.1093e-01, PNorm = 56.9317, GNorm = 0.6661, lr_0 = 5.8913e-04
Loss = 1.0102e-01, PNorm = 56.9443, GNorm = 0.6483, lr_0 = 5.8872e-04
Loss = 1.2633e-01, PNorm = 56.9558, GNorm = 1.7555, lr_0 = 5.8832e-04
Loss = 1.1820e-01, PNorm = 56.9720, GNorm = 0.8461, lr_0 = 5.8792e-04
Loss = 9.6968e-02, PNorm = 56.9854, GNorm = 0.5005, lr_0 = 5.8751e-04
Loss = 9.7237e-02, PNorm = 57.0011, GNorm = 1.0415, lr_0 = 5.8711e-04
Loss = 1.1454e-01, PNorm = 57.0214, GNorm = 0.6754, lr_0 = 5.8671e-04
Loss = 1.1222e-01, PNorm = 57.0396, GNorm = 0.6078, lr_0 = 5.8631e-04
Loss = 9.8812e-02, PNorm = 57.0550, GNorm = 0.8325, lr_0 = 5.8591e-04
Loss = 1.0818e-01, PNorm = 57.0685, GNorm = 0.7792, lr_0 = 5.8550e-04
Loss = 1.0025e-01, PNorm = 57.0801, GNorm = 0.7305, lr_0 = 5.8510e-04
Loss = 1.2217e-01, PNorm = 57.0916, GNorm = 0.8168, lr_0 = 5.8470e-04
Loss = 1.0706e-01, PNorm = 57.1058, GNorm = 0.5370, lr_0 = 5.8430e-04
Loss = 9.8976e-02, PNorm = 57.1243, GNorm = 1.8591, lr_0 = 5.8390e-04
Loss = 1.0817e-01, PNorm = 57.1424, GNorm = 1.3702, lr_0 = 5.8350e-04
Loss = 1.0599e-01, PNorm = 57.1562, GNorm = 1.4159, lr_0 = 5.8310e-04
Loss = 9.8175e-02, PNorm = 57.1660, GNorm = 0.4400, lr_0 = 5.8270e-04
Loss = 1.1529e-01, PNorm = 57.1776, GNorm = 0.6927, lr_0 = 5.8230e-04
Loss = 1.2254e-01, PNorm = 57.1902, GNorm = 0.8107, lr_0 = 5.8190e-04
Loss = 1.1742e-01, PNorm = 57.2068, GNorm = 0.7467, lr_0 = 5.8151e-04
Loss = 1.2269e-01, PNorm = 57.2213, GNorm = 0.7177, lr_0 = 5.8111e-04
Loss = 8.9863e-02, PNorm = 57.2306, GNorm = 0.4776, lr_0 = 5.8071e-04
Loss = 1.0279e-01, PNorm = 57.2479, GNorm = 0.5853, lr_0 = 5.8031e-04
Loss = 1.1461e-01, PNorm = 57.2646, GNorm = 0.9076, lr_0 = 5.7991e-04
Loss = 1.0272e-01, PNorm = 57.2795, GNorm = 0.7108, lr_0 = 5.7952e-04
Loss = 1.1760e-01, PNorm = 57.2939, GNorm = 1.3232, lr_0 = 5.7912e-04
Loss = 1.1352e-01, PNorm = 57.3055, GNorm = 0.8246, lr_0 = 5.7872e-04
Loss = 1.0709e-01, PNorm = 57.3229, GNorm = 0.7551, lr_0 = 5.7833e-04
Loss = 1.0476e-01, PNorm = 57.3320, GNorm = 0.6768, lr_0 = 5.7793e-04
Loss = 1.2641e-01, PNorm = 57.3456, GNorm = 0.8569, lr_0 = 5.7753e-04
Loss = 1.0406e-01, PNorm = 57.3629, GNorm = 0.6563, lr_0 = 5.7714e-04
Loss = 1.2504e-01, PNorm = 57.3757, GNorm = 0.8832, lr_0 = 5.7674e-04
Loss = 1.0738e-01, PNorm = 57.3859, GNorm = 0.6042, lr_0 = 5.7635e-04
Loss = 1.2532e-01, PNorm = 57.3981, GNorm = 0.7591, lr_0 = 5.7595e-04
Loss = 8.9987e-02, PNorm = 57.4121, GNorm = 0.9671, lr_0 = 5.7556e-04
Loss = 1.0466e-01, PNorm = 57.4257, GNorm = 1.7429, lr_0 = 5.7516e-04
Loss = 1.0566e-01, PNorm = 57.4406, GNorm = 1.1735, lr_0 = 5.7477e-04
Loss = 1.0674e-01, PNorm = 57.4528, GNorm = 1.1449, lr_0 = 5.7438e-04
Loss = 1.1199e-01, PNorm = 57.4659, GNorm = 0.7028, lr_0 = 5.7398e-04
Loss = 1.0130e-01, PNorm = 57.4796, GNorm = 0.7768, lr_0 = 5.7359e-04
Loss = 1.1498e-01, PNorm = 57.4942, GNorm = 0.8092, lr_0 = 5.7320e-04
Loss = 1.0561e-01, PNorm = 57.5104, GNorm = 1.0139, lr_0 = 5.7280e-04
Loss = 1.0499e-01, PNorm = 57.5294, GNorm = 0.7433, lr_0 = 5.7241e-04
Loss = 1.1264e-01, PNorm = 57.5413, GNorm = 0.6245, lr_0 = 5.7202e-04
Loss = 9.5720e-02, PNorm = 57.5526, GNorm = 0.7516, lr_0 = 5.7163e-04
Loss = 8.9448e-02, PNorm = 57.5581, GNorm = 0.6996, lr_0 = 5.7124e-04
Loss = 1.0285e-01, PNorm = 57.5649, GNorm = 0.6166, lr_0 = 5.7084e-04
Loss = 1.1770e-01, PNorm = 57.5757, GNorm = 0.7244, lr_0 = 5.7045e-04
Loss = 1.0143e-01, PNorm = 57.5930, GNorm = 1.0422, lr_0 = 5.7006e-04
Loss = 1.0294e-01, PNorm = 57.6074, GNorm = 0.9818, lr_0 = 5.6967e-04
Loss = 1.2201e-01, PNorm = 57.6191, GNorm = 0.6179, lr_0 = 5.6928e-04
Loss = 1.1314e-01, PNorm = 57.6400, GNorm = 0.6067, lr_0 = 5.6889e-04
Loss = 1.2063e-01, PNorm = 57.6555, GNorm = 0.7742, lr_0 = 5.6850e-04
Loss = 9.9041e-02, PNorm = 57.6615, GNorm = 1.0213, lr_0 = 5.6811e-04
Loss = 8.9805e-02, PNorm = 57.6653, GNorm = 1.4448, lr_0 = 5.6772e-04
Loss = 1.3280e-01, PNorm = 57.6776, GNorm = 1.6138, lr_0 = 5.6733e-04
Loss = 1.0579e-01, PNorm = 57.6933, GNorm = 0.6669, lr_0 = 5.6695e-04
Loss = 1.0698e-01, PNorm = 57.7068, GNorm = 0.7294, lr_0 = 5.6656e-04
Loss = 1.1245e-01, PNorm = 57.7108, GNorm = 0.5923, lr_0 = 5.6617e-04
Loss = 9.5786e-02, PNorm = 57.7162, GNorm = 1.1065, lr_0 = 5.6578e-04
Loss = 1.0558e-01, PNorm = 57.7311, GNorm = 1.3422, lr_0 = 5.6539e-04
Loss = 1.1443e-01, PNorm = 57.7504, GNorm = 1.6188, lr_0 = 5.6501e-04
Loss = 1.0213e-01, PNorm = 57.7679, GNorm = 0.7842, lr_0 = 5.6462e-04
Loss = 1.3375e-01, PNorm = 57.7794, GNorm = 1.2389, lr_0 = 5.6423e-04
Loss = 1.1029e-01, PNorm = 57.7937, GNorm = 0.7769, lr_0 = 5.6385e-04
Loss = 1.1481e-01, PNorm = 57.8063, GNorm = 1.0558, lr_0 = 5.6346e-04
Loss = 1.1402e-01, PNorm = 57.8203, GNorm = 0.4191, lr_0 = 5.6307e-04
Loss = 1.2017e-01, PNorm = 57.8335, GNorm = 0.5488, lr_0 = 5.6269e-04
Loss = 1.3100e-01, PNorm = 57.8574, GNorm = 0.7370, lr_0 = 5.6230e-04
Validation mae = 0.440844
Epoch 9
Loss = 1.1057e-01, PNorm = 57.8807, GNorm = 0.5802, lr_0 = 5.6192e-04
Loss = 1.1170e-01, PNorm = 57.9009, GNorm = 1.6084, lr_0 = 5.6153e-04
Loss = 1.1152e-01, PNorm = 57.9221, GNorm = 0.8970, lr_0 = 5.6115e-04
Loss = 9.9994e-02, PNorm = 57.9386, GNorm = 2.3539, lr_0 = 5.6076e-04
Loss = 9.4978e-02, PNorm = 57.9522, GNorm = 1.3568, lr_0 = 5.6038e-04
Loss = 1.0588e-01, PNorm = 57.9655, GNorm = 0.8675, lr_0 = 5.6000e-04
Loss = 9.6089e-02, PNorm = 57.9737, GNorm = 1.2406, lr_0 = 5.5961e-04
Loss = 9.3722e-02, PNorm = 57.9826, GNorm = 0.5485, lr_0 = 5.5923e-04
Loss = 9.3013e-02, PNorm = 57.9946, GNorm = 0.6301, lr_0 = 5.5885e-04
Loss = 1.0548e-01, PNorm = 58.0128, GNorm = 0.5937, lr_0 = 5.5846e-04
Loss = 1.0647e-01, PNorm = 58.0225, GNorm = 0.5937, lr_0 = 5.5808e-04
Loss = 1.0602e-01, PNorm = 58.0361, GNorm = 0.7659, lr_0 = 5.5770e-04
Loss = 9.6533e-02, PNorm = 58.0526, GNorm = 0.8352, lr_0 = 5.5732e-04
Loss = 8.3862e-02, PNorm = 58.0652, GNorm = 0.9459, lr_0 = 5.5693e-04
Loss = 8.5683e-02, PNorm = 58.0763, GNorm = 0.8662, lr_0 = 5.5655e-04
Loss = 1.0834e-01, PNorm = 58.0928, GNorm = 0.5733, lr_0 = 5.5617e-04
Loss = 1.1083e-01, PNorm = 58.1068, GNorm = 0.6970, lr_0 = 5.5579e-04
Loss = 1.0390e-01, PNorm = 58.1161, GNorm = 0.8715, lr_0 = 5.5541e-04
Loss = 1.1114e-01, PNorm = 58.1256, GNorm = 0.6411, lr_0 = 5.5503e-04
Loss = 9.7867e-02, PNorm = 58.1333, GNorm = 0.5411, lr_0 = 5.5465e-04
Loss = 9.4757e-02, PNorm = 58.1439, GNorm = 0.5243, lr_0 = 5.5427e-04
Loss = 8.6773e-02, PNorm = 58.1574, GNorm = 0.8482, lr_0 = 5.5389e-04
Loss = 9.5422e-02, PNorm = 58.1751, GNorm = 0.9580, lr_0 = 5.5351e-04
Loss = 1.0950e-01, PNorm = 58.1904, GNorm = 0.4902, lr_0 = 5.5313e-04
Loss = 9.4243e-02, PNorm = 58.2058, GNorm = 0.5698, lr_0 = 5.5275e-04
Loss = 1.0144e-01, PNorm = 58.2219, GNorm = 0.6019, lr_0 = 5.5237e-04
Loss = 9.7702e-02, PNorm = 58.2366, GNorm = 1.2568, lr_0 = 5.5199e-04
Loss = 9.0874e-02, PNorm = 58.2483, GNorm = 1.0417, lr_0 = 5.5162e-04
Loss = 1.0440e-01, PNorm = 58.2585, GNorm = 0.6983, lr_0 = 5.5124e-04
Loss = 1.0555e-01, PNorm = 58.2716, GNorm = 0.5827, lr_0 = 5.5086e-04
Loss = 1.0754e-01, PNorm = 58.2898, GNorm = 0.4735, lr_0 = 5.5048e-04
Loss = 1.0597e-01, PNorm = 58.3037, GNorm = 0.8131, lr_0 = 5.5011e-04
Loss = 1.0114e-01, PNorm = 58.3175, GNorm = 0.7889, lr_0 = 5.4973e-04
Loss = 1.0381e-01, PNorm = 58.3353, GNorm = 0.5983, lr_0 = 5.4935e-04
Loss = 1.0852e-01, PNorm = 58.3445, GNorm = 0.8107, lr_0 = 5.4898e-04
Loss = 9.6776e-02, PNorm = 58.3471, GNorm = 1.0007, lr_0 = 5.4860e-04
Loss = 1.0418e-01, PNorm = 58.3580, GNorm = 0.6154, lr_0 = 5.4822e-04
Loss = 1.0638e-01, PNorm = 58.3783, GNorm = 0.5610, lr_0 = 5.4785e-04
Loss = 1.1460e-01, PNorm = 58.3894, GNorm = 0.6737, lr_0 = 5.4747e-04
Loss = 9.6182e-02, PNorm = 58.4044, GNorm = 1.0810, lr_0 = 5.4710e-04
Loss = 1.0470e-01, PNorm = 58.4187, GNorm = 1.4102, lr_0 = 5.4672e-04
Loss = 9.9241e-02, PNorm = 58.4361, GNorm = 0.5790, lr_0 = 5.4635e-04
Loss = 1.0517e-01, PNorm = 58.4510, GNorm = 0.9169, lr_0 = 5.4597e-04
Loss = 1.0593e-01, PNorm = 58.4712, GNorm = 0.9958, lr_0 = 5.4560e-04
Loss = 1.0225e-01, PNorm = 58.4846, GNorm = 0.6693, lr_0 = 5.4523e-04
Loss = 1.1077e-01, PNorm = 58.4981, GNorm = 1.1027, lr_0 = 5.4485e-04
Loss = 1.0725e-01, PNorm = 58.5138, GNorm = 1.0007, lr_0 = 5.4448e-04
Loss = 9.6629e-02, PNorm = 58.5248, GNorm = 0.7104, lr_0 = 5.4411e-04
Loss = 1.1049e-01, PNorm = 58.5443, GNorm = 0.8948, lr_0 = 5.4373e-04
Loss = 1.1290e-01, PNorm = 58.5614, GNorm = 1.0895, lr_0 = 5.4336e-04
Loss = 1.0216e-01, PNorm = 58.5748, GNorm = 1.1025, lr_0 = 5.4299e-04
Loss = 9.4309e-02, PNorm = 58.5934, GNorm = 1.2282, lr_0 = 5.4262e-04
Loss = 1.0114e-01, PNorm = 58.6136, GNorm = 1.0376, lr_0 = 5.4225e-04
Loss = 9.6543e-02, PNorm = 58.6355, GNorm = 1.7831, lr_0 = 5.4187e-04
Loss = 1.0586e-01, PNorm = 58.6523, GNorm = 1.3101, lr_0 = 5.4150e-04
Loss = 1.0509e-01, PNorm = 58.6648, GNorm = 0.8482, lr_0 = 5.4113e-04
Loss = 1.1064e-01, PNorm = 58.6781, GNorm = 0.5827, lr_0 = 5.4076e-04
Loss = 1.0070e-01, PNorm = 58.6936, GNorm = 0.6499, lr_0 = 5.4039e-04
Loss = 9.5514e-02, PNorm = 58.7042, GNorm = 0.5284, lr_0 = 5.4002e-04
Loss = 9.5466e-02, PNorm = 58.7147, GNorm = 1.1221, lr_0 = 5.3965e-04
Loss = 9.4212e-02, PNorm = 58.7282, GNorm = 0.5056, lr_0 = 5.3928e-04
Loss = 9.8499e-02, PNorm = 58.7382, GNorm = 0.7299, lr_0 = 5.3891e-04
Loss = 1.0220e-01, PNorm = 58.7486, GNorm = 0.6665, lr_0 = 5.3854e-04
Loss = 1.0987e-01, PNorm = 58.7608, GNorm = 1.1784, lr_0 = 5.3817e-04
Loss = 1.1388e-01, PNorm = 58.7735, GNorm = 0.6605, lr_0 = 5.3781e-04
Loss = 9.4583e-02, PNorm = 58.7896, GNorm = 0.6538, lr_0 = 5.3744e-04
Loss = 9.4404e-02, PNorm = 58.8028, GNorm = 1.3986, lr_0 = 5.3707e-04
Loss = 9.7324e-02, PNorm = 58.8152, GNorm = 0.9546, lr_0 = 5.3670e-04
Loss = 1.0595e-01, PNorm = 58.8303, GNorm = 0.6643, lr_0 = 5.3633e-04
Loss = 1.1036e-01, PNorm = 58.8417, GNorm = 0.8074, lr_0 = 5.3597e-04
Loss = 1.0097e-01, PNorm = 58.8576, GNorm = 0.8128, lr_0 = 5.3560e-04
Loss = 9.9985e-02, PNorm = 58.8729, GNorm = 0.9911, lr_0 = 5.3523e-04
Loss = 1.3153e-01, PNorm = 58.8906, GNorm = 1.4374, lr_0 = 5.3486e-04
Loss = 1.1140e-01, PNorm = 58.9101, GNorm = 0.7617, lr_0 = 5.3450e-04
Loss = 9.5778e-02, PNorm = 58.9241, GNorm = 0.7506, lr_0 = 5.3413e-04
Loss = 8.8195e-02, PNorm = 58.9354, GNorm = 0.6848, lr_0 = 5.3377e-04
Loss = 9.9476e-02, PNorm = 58.9484, GNorm = 0.7412, lr_0 = 5.3340e-04
Loss = 1.0594e-01, PNorm = 58.9623, GNorm = 0.8139, lr_0 = 5.3304e-04
Loss = 1.1116e-01, PNorm = 58.9749, GNorm = 0.6278, lr_0 = 5.3267e-04
Loss = 1.1237e-01, PNorm = 58.9895, GNorm = 0.7185, lr_0 = 5.3231e-04
Loss = 1.0405e-01, PNorm = 59.0038, GNorm = 0.5167, lr_0 = 5.3194e-04
Loss = 8.9658e-02, PNorm = 59.0166, GNorm = 1.1335, lr_0 = 5.3158e-04
Loss = 9.4493e-02, PNorm = 59.0313, GNorm = 0.4908, lr_0 = 5.3121e-04
Loss = 1.1243e-01, PNorm = 59.0414, GNorm = 1.0444, lr_0 = 5.3085e-04
Loss = 9.1824e-02, PNorm = 59.0496, GNorm = 0.4676, lr_0 = 5.3048e-04
Loss = 9.9530e-02, PNorm = 59.0553, GNorm = 0.6900, lr_0 = 5.3012e-04
Loss = 1.1358e-01, PNorm = 59.0722, GNorm = 0.6576, lr_0 = 5.2976e-04
Loss = 1.0305e-01, PNorm = 59.0893, GNorm = 0.6946, lr_0 = 5.2939e-04
Loss = 1.0861e-01, PNorm = 59.1060, GNorm = 0.9623, lr_0 = 5.2903e-04
Loss = 9.3396e-02, PNorm = 59.1195, GNorm = 0.5186, lr_0 = 5.2867e-04
Loss = 1.0698e-01, PNorm = 59.1313, GNorm = 1.0060, lr_0 = 5.2831e-04
Loss = 1.0608e-01, PNorm = 59.1446, GNorm = 0.7966, lr_0 = 5.2795e-04
Loss = 1.0228e-01, PNorm = 59.1574, GNorm = 0.8466, lr_0 = 5.2758e-04
Loss = 1.2464e-01, PNorm = 59.1705, GNorm = 0.9674, lr_0 = 5.2722e-04
Loss = 1.0534e-01, PNorm = 59.1803, GNorm = 1.0303, lr_0 = 5.2686e-04
Loss = 8.5733e-02, PNorm = 59.1910, GNorm = 0.9570, lr_0 = 5.2650e-04
Loss = 1.1791e-01, PNorm = 59.2024, GNorm = 0.8294, lr_0 = 5.2614e-04
Loss = 1.0311e-01, PNorm = 59.2124, GNorm = 0.5756, lr_0 = 5.2578e-04
Loss = 9.3802e-02, PNorm = 59.2240, GNorm = 0.6297, lr_0 = 5.2542e-04
Loss = 9.2768e-02, PNorm = 59.2383, GNorm = 0.9094, lr_0 = 5.2506e-04
Loss = 1.1459e-01, PNorm = 59.2492, GNorm = 0.7890, lr_0 = 5.2470e-04
Loss = 1.0717e-01, PNorm = 59.2642, GNorm = 0.7544, lr_0 = 5.2434e-04
Loss = 9.8718e-02, PNorm = 59.2746, GNorm = 0.6071, lr_0 = 5.2398e-04
Loss = 8.9859e-02, PNorm = 59.2862, GNorm = 1.3300, lr_0 = 5.2362e-04
Loss = 1.1397e-01, PNorm = 59.2988, GNorm = 1.3218, lr_0 = 5.2326e-04
Loss = 9.2083e-02, PNorm = 59.3104, GNorm = 1.1946, lr_0 = 5.2290e-04
Loss = 1.1446e-01, PNorm = 59.3230, GNorm = 0.7971, lr_0 = 5.2255e-04
Loss = 1.0502e-01, PNorm = 59.3343, GNorm = 0.9844, lr_0 = 5.2219e-04
Loss = 9.4294e-02, PNorm = 59.3409, GNorm = 0.8436, lr_0 = 5.2183e-04
Loss = 1.0628e-01, PNorm = 59.3525, GNorm = 0.8202, lr_0 = 5.2147e-04
Loss = 1.0108e-01, PNorm = 59.3669, GNorm = 0.8750, lr_0 = 5.2112e-04
Loss = 9.7882e-02, PNorm = 59.3764, GNorm = 1.4235, lr_0 = 5.2076e-04
Loss = 9.3839e-02, PNorm = 59.3904, GNorm = 1.1230, lr_0 = 5.2040e-04
Loss = 1.0638e-01, PNorm = 59.4015, GNorm = 0.8283, lr_0 = 5.2005e-04
Loss = 1.0134e-01, PNorm = 59.4169, GNorm = 0.8053, lr_0 = 5.1969e-04
Loss = 1.0914e-01, PNorm = 59.4296, GNorm = 0.6138, lr_0 = 5.1933e-04
Loss = 9.7585e-02, PNorm = 59.4408, GNorm = 0.4660, lr_0 = 5.1898e-04
Loss = 1.0676e-01, PNorm = 59.4496, GNorm = 0.7482, lr_0 = 5.1862e-04
Loss = 9.7622e-02, PNorm = 59.4571, GNorm = 0.7376, lr_0 = 5.1827e-04
Loss = 8.7620e-02, PNorm = 59.4675, GNorm = 0.9532, lr_0 = 5.1791e-04
Validation mae = 0.404753
Epoch 10
Loss = 8.7851e-02, PNorm = 59.4803, GNorm = 0.4895, lr_0 = 5.1756e-04
Loss = 7.2827e-02, PNorm = 59.4913, GNorm = 0.4570, lr_0 = 5.1720e-04
Loss = 8.3726e-02, PNorm = 59.5057, GNorm = 1.0006, lr_0 = 5.1685e-04
Loss = 9.0394e-02, PNorm = 59.5164, GNorm = 0.7960, lr_0 = 5.1649e-04
Loss = 9.3831e-02, PNorm = 59.5273, GNorm = 0.4418, lr_0 = 5.1614e-04
Loss = 9.0795e-02, PNorm = 59.5434, GNorm = 0.8630, lr_0 = 5.1579e-04
Loss = 1.0633e-01, PNorm = 59.5584, GNorm = 0.6488, lr_0 = 5.1543e-04
Loss = 9.5668e-02, PNorm = 59.5722, GNorm = 0.4860, lr_0 = 5.1508e-04
Loss = 8.5414e-02, PNorm = 59.5866, GNorm = 0.5293, lr_0 = 5.1473e-04
Loss = 9.4590e-02, PNorm = 59.5980, GNorm = 0.5116, lr_0 = 5.1437e-04
Loss = 9.4345e-02, PNorm = 59.6074, GNorm = 0.7269, lr_0 = 5.1402e-04
Loss = 1.0225e-01, PNorm = 59.6251, GNorm = 0.6577, lr_0 = 5.1367e-04
Loss = 9.1782e-02, PNorm = 59.6401, GNorm = 0.6391, lr_0 = 5.1332e-04
Loss = 8.5343e-02, PNorm = 59.6559, GNorm = 1.0712, lr_0 = 5.1297e-04
Loss = 8.3581e-02, PNorm = 59.6657, GNorm = 1.3669, lr_0 = 5.1262e-04
Loss = 8.9267e-02, PNorm = 59.6787, GNorm = 0.6501, lr_0 = 5.1226e-04
Loss = 1.0303e-01, PNorm = 59.6945, GNorm = 1.5594, lr_0 = 5.1191e-04
Loss = 9.7686e-02, PNorm = 59.7095, GNorm = 0.6831, lr_0 = 5.1156e-04
Loss = 9.9609e-02, PNorm = 59.7237, GNorm = 0.5274, lr_0 = 5.1121e-04
Loss = 9.6346e-02, PNorm = 59.7380, GNorm = 0.8074, lr_0 = 5.1086e-04
Loss = 9.9548e-02, PNorm = 59.7504, GNorm = 1.3322, lr_0 = 5.1051e-04
Loss = 9.9333e-02, PNorm = 59.7643, GNorm = 0.8046, lr_0 = 5.1016e-04
Loss = 9.7132e-02, PNorm = 59.7802, GNorm = 1.1225, lr_0 = 5.0981e-04
Loss = 8.7748e-02, PNorm = 59.7929, GNorm = 0.8302, lr_0 = 5.0946e-04
Loss = 8.7615e-02, PNorm = 59.8008, GNorm = 0.5860, lr_0 = 5.0911e-04
Loss = 1.0598e-01, PNorm = 59.8074, GNorm = 0.8113, lr_0 = 5.0877e-04
Loss = 8.6082e-02, PNorm = 59.8180, GNorm = 0.6242, lr_0 = 5.0842e-04
Loss = 9.5219e-02, PNorm = 59.8276, GNorm = 0.6884, lr_0 = 5.0807e-04
Loss = 9.5393e-02, PNorm = 59.8390, GNorm = 0.8798, lr_0 = 5.0772e-04
Loss = 9.8394e-02, PNorm = 59.8486, GNorm = 0.9190, lr_0 = 5.0737e-04
Loss = 9.3618e-02, PNorm = 59.8552, GNorm = 1.3183, lr_0 = 5.0703e-04
Loss = 9.6334e-02, PNorm = 59.8633, GNorm = 1.5233, lr_0 = 5.0668e-04
Loss = 9.5273e-02, PNorm = 59.8790, GNorm = 1.1741, lr_0 = 5.0633e-04
Loss = 1.1598e-01, PNorm = 59.8952, GNorm = 1.1705, lr_0 = 5.0598e-04
Loss = 1.1421e-01, PNorm = 59.9122, GNorm = 0.9384, lr_0 = 5.0564e-04
Loss = 8.7730e-02, PNorm = 59.9230, GNorm = 0.5942, lr_0 = 5.0529e-04
Loss = 9.1825e-02, PNorm = 59.9344, GNorm = 0.5592, lr_0 = 5.0494e-04
Loss = 1.0780e-01, PNorm = 59.9577, GNorm = 0.4648, lr_0 = 5.0460e-04
Loss = 9.6548e-02, PNorm = 59.9753, GNorm = 0.6858, lr_0 = 5.0425e-04
Loss = 9.0890e-02, PNorm = 59.9876, GNorm = 0.6587, lr_0 = 5.0391e-04
Loss = 1.0352e-01, PNorm = 59.9977, GNorm = 0.8473, lr_0 = 5.0356e-04
Loss = 9.8070e-02, PNorm = 60.0118, GNorm = 1.0153, lr_0 = 5.0322e-04
Loss = 9.0797e-02, PNorm = 60.0235, GNorm = 0.6057, lr_0 = 5.0287e-04
Loss = 1.1491e-01, PNorm = 60.0410, GNorm = 1.0912, lr_0 = 5.0253e-04
Loss = 9.8607e-02, PNorm = 60.0525, GNorm = 0.8389, lr_0 = 5.0218e-04
Loss = 1.0892e-01, PNorm = 60.0634, GNorm = 0.6603, lr_0 = 5.0184e-04
Loss = 9.2945e-02, PNorm = 60.0778, GNorm = 1.1074, lr_0 = 5.0150e-04
Loss = 1.0331e-01, PNorm = 60.0885, GNorm = 0.5937, lr_0 = 5.0115e-04
Loss = 8.4553e-02, PNorm = 60.0983, GNorm = 0.4453, lr_0 = 5.0081e-04
Loss = 9.1790e-02, PNorm = 60.1051, GNorm = 0.8168, lr_0 = 5.0047e-04
Loss = 9.1917e-02, PNorm = 60.1165, GNorm = 0.5963, lr_0 = 5.0012e-04
Loss = 9.6671e-02, PNorm = 60.1287, GNorm = 0.7759, lr_0 = 4.9978e-04
Loss = 9.3949e-02, PNorm = 60.1398, GNorm = 0.6267, lr_0 = 4.9944e-04
Loss = 1.0096e-01, PNorm = 60.1534, GNorm = 0.8713, lr_0 = 4.9910e-04
Loss = 8.8572e-02, PNorm = 60.1642, GNorm = 0.6562, lr_0 = 4.9875e-04
Loss = 7.9430e-02, PNorm = 60.1728, GNorm = 0.5075, lr_0 = 4.9841e-04
Loss = 1.0393e-01, PNorm = 60.1806, GNorm = 0.5407, lr_0 = 4.9807e-04
Loss = 9.1214e-02, PNorm = 60.1991, GNorm = 0.8108, lr_0 = 4.9773e-04
Loss = 8.2621e-02, PNorm = 60.2132, GNorm = 0.7170, lr_0 = 4.9739e-04
Loss = 9.7154e-02, PNorm = 60.2279, GNorm = 0.7316, lr_0 = 4.9705e-04
Loss = 8.8495e-02, PNorm = 60.2366, GNorm = 0.6822, lr_0 = 4.9671e-04
Loss = 7.8472e-02, PNorm = 60.2456, GNorm = 1.0072, lr_0 = 4.9637e-04
Loss = 8.4921e-02, PNorm = 60.2576, GNorm = 1.0386, lr_0 = 4.9603e-04
Loss = 8.0818e-02, PNorm = 60.2654, GNorm = 0.6344, lr_0 = 4.9569e-04
Loss = 9.0546e-02, PNorm = 60.2778, GNorm = 0.4994, lr_0 = 4.9535e-04
Loss = 8.9859e-02, PNorm = 60.2851, GNorm = 0.4472, lr_0 = 4.9501e-04
Loss = 9.1688e-02, PNorm = 60.2915, GNorm = 0.6904, lr_0 = 4.9467e-04
Loss = 9.4780e-02, PNorm = 60.2989, GNorm = 0.4988, lr_0 = 4.9433e-04
Loss = 9.5326e-02, PNorm = 60.3081, GNorm = 0.6373, lr_0 = 4.9399e-04
Loss = 9.3331e-02, PNorm = 60.3167, GNorm = 0.4278, lr_0 = 4.9365e-04
Loss = 1.0258e-01, PNorm = 60.3282, GNorm = 1.0969, lr_0 = 4.9332e-04
Loss = 8.6876e-02, PNorm = 60.3454, GNorm = 0.7690, lr_0 = 4.9298e-04
Loss = 8.0618e-02, PNorm = 60.3568, GNorm = 0.5536, lr_0 = 4.9264e-04
Loss = 8.6438e-02, PNorm = 60.3671, GNorm = 0.7847, lr_0 = 4.9230e-04
Loss = 9.3433e-02, PNorm = 60.3774, GNorm = 1.3793, lr_0 = 4.9197e-04
Loss = 8.8835e-02, PNorm = 60.3891, GNorm = 0.6384, lr_0 = 4.9163e-04
Loss = 9.9831e-02, PNorm = 60.4066, GNorm = 0.6095, lr_0 = 4.9129e-04
Loss = 9.7655e-02, PNorm = 60.4198, GNorm = 0.4952, lr_0 = 4.9095e-04
Loss = 9.2233e-02, PNorm = 60.4275, GNorm = 0.5276, lr_0 = 4.9062e-04
Loss = 9.1991e-02, PNorm = 60.4319, GNorm = 1.2491, lr_0 = 4.9028e-04
Loss = 9.4289e-02, PNorm = 60.4447, GNorm = 0.6788, lr_0 = 4.8995e-04
Loss = 9.0327e-02, PNorm = 60.4584, GNorm = 0.8584, lr_0 = 4.8961e-04
Loss = 8.6525e-02, PNorm = 60.4737, GNorm = 0.5932, lr_0 = 4.8928e-04
Loss = 1.0003e-01, PNorm = 60.4848, GNorm = 0.7937, lr_0 = 4.8894e-04
Loss = 9.2627e-02, PNorm = 60.4971, GNorm = 0.9447, lr_0 = 4.8861e-04
Loss = 1.0729e-01, PNorm = 60.5098, GNorm = 0.6828, lr_0 = 4.8827e-04
Loss = 8.7622e-02, PNorm = 60.5197, GNorm = 1.5192, lr_0 = 4.8794e-04
Loss = 7.9866e-02, PNorm = 60.5302, GNorm = 0.6054, lr_0 = 4.8760e-04
Loss = 1.0239e-01, PNorm = 60.5440, GNorm = 0.6575, lr_0 = 4.8727e-04
Loss = 9.4835e-02, PNorm = 60.5599, GNorm = 0.4888, lr_0 = 4.8693e-04
Loss = 9.3395e-02, PNorm = 60.5744, GNorm = 0.6597, lr_0 = 4.8660e-04
Loss = 9.7833e-02, PNorm = 60.5838, GNorm = 0.4993, lr_0 = 4.8627e-04
Loss = 9.2584e-02, PNorm = 60.5957, GNorm = 0.5626, lr_0 = 4.8593e-04
Loss = 1.0073e-01, PNorm = 60.6056, GNorm = 0.8058, lr_0 = 4.8560e-04
Loss = 9.5748e-02, PNorm = 60.6136, GNorm = 1.4323, lr_0 = 4.8527e-04
Loss = 7.8905e-02, PNorm = 60.6270, GNorm = 0.5845, lr_0 = 4.8494e-04
Loss = 9.6787e-02, PNorm = 60.6407, GNorm = 1.1595, lr_0 = 4.8460e-04
Loss = 9.5686e-02, PNorm = 60.6503, GNorm = 1.1234, lr_0 = 4.8427e-04
Loss = 8.9133e-02, PNorm = 60.6620, GNorm = 0.8512, lr_0 = 4.8394e-04
Loss = 8.8639e-02, PNorm = 60.6728, GNorm = 1.0278, lr_0 = 4.8361e-04
Loss = 9.6170e-02, PNorm = 60.6781, GNorm = 1.0679, lr_0 = 4.8328e-04
Loss = 9.4766e-02, PNorm = 60.6910, GNorm = 0.7412, lr_0 = 4.8295e-04
Loss = 9.9810e-02, PNorm = 60.7058, GNorm = 1.2152, lr_0 = 4.8262e-04
Loss = 9.5379e-02, PNorm = 60.7146, GNorm = 0.9172, lr_0 = 4.8228e-04
Loss = 9.2785e-02, PNorm = 60.7234, GNorm = 0.6538, lr_0 = 4.8195e-04
Loss = 8.6893e-02, PNorm = 60.7366, GNorm = 1.1595, lr_0 = 4.8162e-04
Loss = 9.2400e-02, PNorm = 60.7460, GNorm = 0.4730, lr_0 = 4.8129e-04
Loss = 9.8185e-02, PNorm = 60.7556, GNorm = 1.2293, lr_0 = 4.8096e-04
Loss = 9.7889e-02, PNorm = 60.7651, GNorm = 0.6836, lr_0 = 4.8064e-04
Loss = 9.1421e-02, PNorm = 60.7775, GNorm = 0.8605, lr_0 = 4.8031e-04
Loss = 9.8076e-02, PNorm = 60.7958, GNorm = 0.5120, lr_0 = 4.7998e-04
Loss = 9.8334e-02, PNorm = 60.8147, GNorm = 1.0326, lr_0 = 4.7965e-04
Loss = 1.0102e-01, PNorm = 60.8272, GNorm = 0.9358, lr_0 = 4.7932e-04
Loss = 9.3980e-02, PNorm = 60.8416, GNorm = 0.8715, lr_0 = 4.7899e-04
Loss = 1.0556e-01, PNorm = 60.8631, GNorm = 1.0068, lr_0 = 4.7866e-04
Loss = 9.3801e-02, PNorm = 60.8764, GNorm = 0.6281, lr_0 = 4.7833e-04
Loss = 8.7723e-02, PNorm = 60.8826, GNorm = 0.6373, lr_0 = 4.7801e-04
Loss = 8.5163e-02, PNorm = 60.8935, GNorm = 0.7689, lr_0 = 4.7768e-04
Loss = 8.9433e-02, PNorm = 60.9037, GNorm = 1.2363, lr_0 = 4.7735e-04
Loss = 1.1447e-01, PNorm = 60.9142, GNorm = 1.5518, lr_0 = 4.7703e-04
Validation mae = 0.409053
Epoch 11
Loss = 9.4943e-02, PNorm = 60.9217, GNorm = 1.3637, lr_0 = 4.7670e-04
Loss = 8.2658e-02, PNorm = 60.9319, GNorm = 0.4894, lr_0 = 4.7637e-04
Loss = 8.3434e-02, PNorm = 60.9427, GNorm = 0.5285, lr_0 = 4.7605e-04
Loss = 8.0749e-02, PNorm = 60.9547, GNorm = 0.9011, lr_0 = 4.7572e-04
Loss = 8.6615e-02, PNorm = 60.9653, GNorm = 0.7576, lr_0 = 4.7539e-04
Loss = 7.9692e-02, PNorm = 60.9766, GNorm = 0.8013, lr_0 = 4.7507e-04
Loss = 7.3561e-02, PNorm = 60.9854, GNorm = 1.2102, lr_0 = 4.7474e-04
Loss = 8.6728e-02, PNorm = 60.9962, GNorm = 0.8768, lr_0 = 4.7442e-04
Loss = 7.9023e-02, PNorm = 61.0065, GNorm = 0.6581, lr_0 = 4.7409e-04
Loss = 7.2671e-02, PNorm = 61.0172, GNorm = 0.7397, lr_0 = 4.7377e-04
Loss = 9.0418e-02, PNorm = 61.0272, GNorm = 0.5917, lr_0 = 4.7344e-04
Loss = 8.7452e-02, PNorm = 61.0383, GNorm = 0.9843, lr_0 = 4.7312e-04
Loss = 8.0812e-02, PNorm = 61.0562, GNorm = 1.1654, lr_0 = 4.7279e-04
Loss = 9.8496e-02, PNorm = 61.0779, GNorm = 1.1614, lr_0 = 4.7247e-04
Loss = 8.8104e-02, PNorm = 61.0946, GNorm = 0.5435, lr_0 = 4.7215e-04
Loss = 8.0154e-02, PNorm = 61.1066, GNorm = 0.6122, lr_0 = 4.7182e-04
Loss = 9.5293e-02, PNorm = 61.1199, GNorm = 0.6438, lr_0 = 4.7150e-04
Loss = 8.3862e-02, PNorm = 61.1341, GNorm = 0.7521, lr_0 = 4.7118e-04
Loss = 8.7748e-02, PNorm = 61.1502, GNorm = 0.5647, lr_0 = 4.7085e-04
Loss = 9.4633e-02, PNorm = 61.1633, GNorm = 1.1404, lr_0 = 4.7053e-04
Loss = 9.5194e-02, PNorm = 61.1693, GNorm = 0.7754, lr_0 = 4.7021e-04
Loss = 7.1020e-02, PNorm = 61.1796, GNorm = 0.8655, lr_0 = 4.6989e-04
Loss = 8.7289e-02, PNorm = 61.1936, GNorm = 0.6246, lr_0 = 4.6957e-04
Loss = 7.2558e-02, PNorm = 61.2010, GNorm = 0.7246, lr_0 = 4.6924e-04
Loss = 8.4414e-02, PNorm = 61.2086, GNorm = 0.9487, lr_0 = 4.6892e-04
Loss = 9.5043e-02, PNorm = 61.2213, GNorm = 0.7944, lr_0 = 4.6860e-04
Loss = 8.6207e-02, PNorm = 61.2360, GNorm = 0.6451, lr_0 = 4.6828e-04
Loss = 8.9587e-02, PNorm = 61.2493, GNorm = 0.8088, lr_0 = 4.6796e-04
Loss = 7.2466e-02, PNorm = 61.2613, GNorm = 0.9086, lr_0 = 4.6764e-04
Loss = 8.4789e-02, PNorm = 61.2755, GNorm = 0.6006, lr_0 = 4.6732e-04
Loss = 9.7968e-02, PNorm = 61.2849, GNorm = 0.6576, lr_0 = 4.6700e-04
Loss = 7.7589e-02, PNorm = 61.2958, GNorm = 0.5931, lr_0 = 4.6668e-04
Loss = 9.5511e-02, PNorm = 61.3041, GNorm = 0.6500, lr_0 = 4.6636e-04
Loss = 8.9068e-02, PNorm = 61.3169, GNorm = 0.9956, lr_0 = 4.6604e-04
Loss = 8.6783e-02, PNorm = 61.3277, GNorm = 0.5624, lr_0 = 4.6572e-04
Loss = 8.6648e-02, PNorm = 61.3385, GNorm = 0.9729, lr_0 = 4.6540e-04
Loss = 8.7544e-02, PNorm = 61.3494, GNorm = 0.4809, lr_0 = 4.6508e-04
Loss = 8.7984e-02, PNorm = 61.3609, GNorm = 0.7651, lr_0 = 4.6476e-04
Loss = 9.1884e-02, PNorm = 61.3721, GNorm = 0.4787, lr_0 = 4.6445e-04
Loss = 9.4598e-02, PNorm = 61.3845, GNorm = 0.6160, lr_0 = 4.6413e-04
Loss = 8.6757e-02, PNorm = 61.3986, GNorm = 1.2167, lr_0 = 4.6381e-04
Loss = 8.4033e-02, PNorm = 61.4103, GNorm = 0.8804, lr_0 = 4.6349e-04
Loss = 1.0152e-01, PNorm = 61.4219, GNorm = 1.2983, lr_0 = 4.6317e-04
Loss = 8.6116e-02, PNorm = 61.4383, GNorm = 0.8482, lr_0 = 4.6286e-04
Loss = 1.0001e-01, PNorm = 61.4534, GNorm = 0.7153, lr_0 = 4.6254e-04
Loss = 7.9059e-02, PNorm = 61.4706, GNorm = 0.5736, lr_0 = 4.6222e-04
Loss = 8.0216e-02, PNorm = 61.4860, GNorm = 0.6508, lr_0 = 4.6191e-04
Loss = 8.4215e-02, PNorm = 61.4988, GNorm = 1.2225, lr_0 = 4.6159e-04
Loss = 8.1280e-02, PNorm = 61.5102, GNorm = 0.5735, lr_0 = 4.6127e-04
Loss = 9.1291e-02, PNorm = 61.5208, GNorm = 1.1348, lr_0 = 4.6096e-04
Loss = 8.7032e-02, PNorm = 61.5344, GNorm = 0.5369, lr_0 = 4.6064e-04
Loss = 7.8271e-02, PNorm = 61.5472, GNorm = 0.9982, lr_0 = 4.6033e-04
Loss = 9.8860e-02, PNorm = 61.5582, GNorm = 0.5609, lr_0 = 4.6001e-04
Loss = 8.6106e-02, PNorm = 61.5695, GNorm = 0.8325, lr_0 = 4.5970e-04
Loss = 9.6082e-02, PNorm = 61.5832, GNorm = 0.6925, lr_0 = 4.5938e-04
Loss = 9.0356e-02, PNorm = 61.5915, GNorm = 0.5344, lr_0 = 4.5907e-04
Loss = 9.1027e-02, PNorm = 61.5994, GNorm = 0.9402, lr_0 = 4.5875e-04
Loss = 7.9952e-02, PNorm = 61.6084, GNorm = 0.4983, lr_0 = 4.5844e-04
Loss = 8.2301e-02, PNorm = 61.6158, GNorm = 0.7698, lr_0 = 4.5812e-04
Loss = 9.0198e-02, PNorm = 61.6271, GNorm = 0.6965, lr_0 = 4.5781e-04
Loss = 7.8298e-02, PNorm = 61.6344, GNorm = 0.6347, lr_0 = 4.5750e-04
Loss = 9.1906e-02, PNorm = 61.6396, GNorm = 0.7684, lr_0 = 4.5718e-04
Loss = 9.6308e-02, PNorm = 61.6503, GNorm = 0.7237, lr_0 = 4.5687e-04
Loss = 8.0194e-02, PNorm = 61.6616, GNorm = 0.3971, lr_0 = 4.5656e-04
Loss = 9.5068e-02, PNorm = 61.6748, GNorm = 0.5514, lr_0 = 4.5624e-04
Loss = 8.3776e-02, PNorm = 61.6897, GNorm = 0.9123, lr_0 = 4.5593e-04
Loss = 9.8177e-02, PNorm = 61.7000, GNorm = 0.7893, lr_0 = 4.5562e-04
Loss = 8.3284e-02, PNorm = 61.7096, GNorm = 0.8031, lr_0 = 4.5531e-04
Loss = 8.6333e-02, PNorm = 61.7215, GNorm = 0.6722, lr_0 = 4.5499e-04
Loss = 9.3721e-02, PNorm = 61.7323, GNorm = 0.8109, lr_0 = 4.5468e-04
Loss = 7.6386e-02, PNorm = 61.7425, GNorm = 0.5308, lr_0 = 4.5437e-04
Loss = 9.2660e-02, PNorm = 61.7491, GNorm = 0.6850, lr_0 = 4.5406e-04
Loss = 8.4230e-02, PNorm = 61.7596, GNorm = 0.7240, lr_0 = 4.5375e-04
Loss = 8.4506e-02, PNorm = 61.7701, GNorm = 0.7646, lr_0 = 4.5344e-04
Loss = 7.4538e-02, PNorm = 61.7803, GNorm = 0.8629, lr_0 = 4.5313e-04
Loss = 8.4708e-02, PNorm = 61.7879, GNorm = 0.7740, lr_0 = 4.5282e-04
Loss = 9.5552e-02, PNorm = 61.7967, GNorm = 1.0519, lr_0 = 4.5251e-04
Loss = 8.0705e-02, PNorm = 61.8047, GNorm = 0.5398, lr_0 = 4.5220e-04
Loss = 7.5325e-02, PNorm = 61.8158, GNorm = 0.5219, lr_0 = 4.5189e-04
Loss = 8.8961e-02, PNorm = 61.8275, GNorm = 0.4261, lr_0 = 4.5158e-04
Loss = 7.8155e-02, PNorm = 61.8399, GNorm = 0.6174, lr_0 = 4.5127e-04
Loss = 9.3876e-02, PNorm = 61.8546, GNorm = 0.9956, lr_0 = 4.5096e-04
Loss = 9.3877e-02, PNorm = 61.8580, GNorm = 0.9756, lr_0 = 4.5065e-04
Loss = 9.3343e-02, PNorm = 61.8649, GNorm = 0.9312, lr_0 = 4.5034e-04
Loss = 8.2058e-02, PNorm = 61.8735, GNorm = 0.5143, lr_0 = 4.5003e-04
Loss = 9.1116e-02, PNorm = 61.8809, GNorm = 0.5449, lr_0 = 4.4972e-04
Loss = 7.8129e-02, PNorm = 61.8888, GNorm = 0.4594, lr_0 = 4.4942e-04
Loss = 8.1367e-02, PNorm = 61.8983, GNorm = 0.5609, lr_0 = 4.4911e-04
Loss = 7.7711e-02, PNorm = 61.9056, GNorm = 0.5433, lr_0 = 4.4880e-04
Loss = 9.4521e-02, PNorm = 61.9130, GNorm = 1.1835, lr_0 = 4.4849e-04
Loss = 8.4569e-02, PNorm = 61.9227, GNorm = 1.0893, lr_0 = 4.4819e-04
Loss = 9.7792e-02, PNorm = 61.9318, GNorm = 1.2006, lr_0 = 4.4788e-04
Loss = 8.8840e-02, PNorm = 61.9431, GNorm = 0.9634, lr_0 = 4.4757e-04
Loss = 9.7648e-02, PNorm = 61.9516, GNorm = 0.7676, lr_0 = 4.4727e-04
Loss = 8.1786e-02, PNorm = 61.9591, GNorm = 0.6288, lr_0 = 4.4696e-04
Loss = 9.8157e-02, PNorm = 61.9688, GNorm = 0.7926, lr_0 = 4.4665e-04
Loss = 8.8869e-02, PNorm = 61.9838, GNorm = 1.0258, lr_0 = 4.4635e-04
Loss = 9.1003e-02, PNorm = 61.9965, GNorm = 0.4971, lr_0 = 4.4604e-04
Loss = 8.5930e-02, PNorm = 62.0064, GNorm = 0.9436, lr_0 = 4.4574e-04
Loss = 8.8101e-02, PNorm = 62.0132, GNorm = 0.5067, lr_0 = 4.4543e-04
Loss = 8.0180e-02, PNorm = 62.0201, GNorm = 0.6451, lr_0 = 4.4513e-04
Loss = 7.6047e-02, PNorm = 62.0282, GNorm = 0.6551, lr_0 = 4.4482e-04
Loss = 9.2517e-02, PNorm = 62.0392, GNorm = 0.7018, lr_0 = 4.4452e-04
Loss = 9.0246e-02, PNorm = 62.0525, GNorm = 0.6675, lr_0 = 4.4421e-04
Loss = 9.1112e-02, PNorm = 62.0671, GNorm = 0.7008, lr_0 = 4.4391e-04
Loss = 8.7736e-02, PNorm = 62.0804, GNorm = 0.6990, lr_0 = 4.4360e-04
Loss = 8.8846e-02, PNorm = 62.0950, GNorm = 0.6230, lr_0 = 4.4330e-04
Loss = 1.0154e-01, PNorm = 62.1064, GNorm = 1.0893, lr_0 = 4.4299e-04
Loss = 9.9741e-02, PNorm = 62.1150, GNorm = 1.2030, lr_0 = 4.4269e-04
Loss = 8.8242e-02, PNorm = 62.1289, GNorm = 0.9665, lr_0 = 4.4239e-04
Loss = 9.1802e-02, PNorm = 62.1407, GNorm = 0.7647, lr_0 = 4.4209e-04
Loss = 8.1747e-02, PNorm = 62.1497, GNorm = 0.8237, lr_0 = 4.4178e-04
Loss = 9.6485e-02, PNorm = 62.1617, GNorm = 1.0345, lr_0 = 4.4148e-04
Loss = 9.4144e-02, PNorm = 62.1746, GNorm = 0.8818, lr_0 = 4.4118e-04
Loss = 8.1486e-02, PNorm = 62.1876, GNorm = 0.6025, lr_0 = 4.4088e-04
Loss = 9.5089e-02, PNorm = 62.1988, GNorm = 0.9166, lr_0 = 4.4057e-04
Loss = 9.0959e-02, PNorm = 62.2080, GNorm = 0.4967, lr_0 = 4.4027e-04
Loss = 9.0040e-02, PNorm = 62.2178, GNorm = 0.6557, lr_0 = 4.3997e-04
Loss = 8.8085e-02, PNorm = 62.2245, GNorm = 1.1030, lr_0 = 4.3967e-04
Loss = 9.6227e-02, PNorm = 62.2402, GNorm = 0.8818, lr_0 = 4.3937e-04
Validation mae = 0.405637
Epoch 12
Loss = 7.9945e-02, PNorm = 62.2528, GNorm = 0.4727, lr_0 = 4.3907e-04
Loss = 8.0840e-02, PNorm = 62.2641, GNorm = 1.1659, lr_0 = 4.3877e-04
Loss = 7.5263e-02, PNorm = 62.2758, GNorm = 0.4925, lr_0 = 4.3846e-04
Loss = 8.3666e-02, PNorm = 62.2920, GNorm = 0.5271, lr_0 = 4.3816e-04
Loss = 7.5117e-02, PNorm = 62.3053, GNorm = 1.3545, lr_0 = 4.3786e-04
Loss = 9.2968e-02, PNorm = 62.3147, GNorm = 1.0145, lr_0 = 4.3756e-04
Loss = 8.0174e-02, PNorm = 62.3268, GNorm = 0.5750, lr_0 = 4.3726e-04
Loss = 7.7186e-02, PNorm = 62.3385, GNorm = 0.5301, lr_0 = 4.3696e-04
Loss = 7.2969e-02, PNorm = 62.3486, GNorm = 0.7086, lr_0 = 4.3667e-04
Loss = 7.0120e-02, PNorm = 62.3565, GNorm = 0.4955, lr_0 = 4.3637e-04
Loss = 6.9261e-02, PNorm = 62.3646, GNorm = 0.6829, lr_0 = 4.3607e-04
Loss = 7.5534e-02, PNorm = 62.3671, GNorm = 0.4419, lr_0 = 4.3577e-04
Loss = 8.0383e-02, PNorm = 62.3732, GNorm = 0.4375, lr_0 = 4.3547e-04
Loss = 7.1040e-02, PNorm = 62.3847, GNorm = 0.8294, lr_0 = 4.3517e-04
Loss = 7.5965e-02, PNorm = 62.3966, GNorm = 0.5139, lr_0 = 4.3487e-04
Loss = 7.3321e-02, PNorm = 62.4041, GNorm = 0.5648, lr_0 = 4.3458e-04
Loss = 7.4123e-02, PNorm = 62.4134, GNorm = 0.6387, lr_0 = 4.3428e-04
Loss = 7.5516e-02, PNorm = 62.4211, GNorm = 0.4642, lr_0 = 4.3398e-04
Loss = 7.8272e-02, PNorm = 62.4322, GNorm = 0.6299, lr_0 = 4.3368e-04
Loss = 8.4895e-02, PNorm = 62.4366, GNorm = 0.5592, lr_0 = 4.3339e-04
Loss = 7.9822e-02, PNorm = 62.4467, GNorm = 0.6214, lr_0 = 4.3309e-04
Loss = 8.1559e-02, PNorm = 62.4581, GNorm = 0.4880, lr_0 = 4.3279e-04
Loss = 8.7243e-02, PNorm = 62.4695, GNorm = 0.6941, lr_0 = 4.3250e-04
Loss = 8.8128e-02, PNorm = 62.4827, GNorm = 0.9572, lr_0 = 4.3220e-04
Loss = 7.7655e-02, PNorm = 62.4940, GNorm = 0.4643, lr_0 = 4.3190e-04
Loss = 8.2309e-02, PNorm = 62.5045, GNorm = 0.4221, lr_0 = 4.3161e-04
Loss = 7.4092e-02, PNorm = 62.5165, GNorm = 0.4657, lr_0 = 4.3131e-04
Loss = 7.9884e-02, PNorm = 62.5287, GNorm = 0.8007, lr_0 = 4.3102e-04
Loss = 7.5837e-02, PNorm = 62.5392, GNorm = 0.5097, lr_0 = 4.3072e-04
Loss = 7.1099e-02, PNorm = 62.5478, GNorm = 0.5444, lr_0 = 4.3043e-04
Loss = 8.0711e-02, PNorm = 62.5576, GNorm = 0.6035, lr_0 = 4.3013e-04
Loss = 7.7546e-02, PNorm = 62.5704, GNorm = 0.6457, lr_0 = 4.2984e-04
Loss = 7.8627e-02, PNorm = 62.5824, GNorm = 0.8897, lr_0 = 4.2954e-04
Loss = 8.1429e-02, PNorm = 62.5915, GNorm = 0.7984, lr_0 = 4.2925e-04
Loss = 9.5377e-02, PNorm = 62.6015, GNorm = 0.7524, lr_0 = 4.2895e-04
Loss = 8.3226e-02, PNorm = 62.6098, GNorm = 0.6016, lr_0 = 4.2866e-04
Loss = 8.2786e-02, PNorm = 62.6203, GNorm = 1.0126, lr_0 = 4.2837e-04
Loss = 8.5773e-02, PNorm = 62.6318, GNorm = 1.3345, lr_0 = 4.2807e-04
Loss = 8.5341e-02, PNorm = 62.6438, GNorm = 0.7796, lr_0 = 4.2778e-04
Loss = 8.3616e-02, PNorm = 62.6521, GNorm = 0.9498, lr_0 = 4.2749e-04
Loss = 8.5981e-02, PNorm = 62.6630, GNorm = 0.7726, lr_0 = 4.2719e-04
Loss = 7.4482e-02, PNorm = 62.6764, GNorm = 0.4916, lr_0 = 4.2690e-04
Loss = 7.6989e-02, PNorm = 62.6889, GNorm = 0.6274, lr_0 = 4.2661e-04
Loss = 9.1924e-02, PNorm = 62.7006, GNorm = 0.9595, lr_0 = 4.2632e-04
Loss = 8.6605e-02, PNorm = 62.7154, GNorm = 0.7770, lr_0 = 4.2602e-04
Loss = 7.6128e-02, PNorm = 62.7283, GNorm = 0.7237, lr_0 = 4.2573e-04
Loss = 8.9156e-02, PNorm = 62.7400, GNorm = 1.0928, lr_0 = 4.2544e-04
Loss = 7.6654e-02, PNorm = 62.7476, GNorm = 0.7445, lr_0 = 4.2515e-04
Loss = 6.5916e-02, PNorm = 62.7550, GNorm = 0.4016, lr_0 = 4.2486e-04
Loss = 8.4293e-02, PNorm = 62.7630, GNorm = 0.8129, lr_0 = 4.2457e-04
Loss = 8.7860e-02, PNorm = 62.7717, GNorm = 0.5678, lr_0 = 4.2428e-04
Loss = 7.9577e-02, PNorm = 62.7819, GNorm = 1.0918, lr_0 = 4.2399e-04
Loss = 7.6292e-02, PNorm = 62.7884, GNorm = 0.5593, lr_0 = 4.2370e-04
Loss = 9.3330e-02, PNorm = 62.7949, GNorm = 0.3917, lr_0 = 4.2340e-04
Loss = 8.6903e-02, PNorm = 62.8032, GNorm = 0.9786, lr_0 = 4.2311e-04
Loss = 8.6296e-02, PNorm = 62.8157, GNorm = 0.7529, lr_0 = 4.2283e-04
Loss = 7.3898e-02, PNorm = 62.8269, GNorm = 0.8020, lr_0 = 4.2254e-04
Loss = 7.6261e-02, PNorm = 62.8362, GNorm = 0.4943, lr_0 = 4.2225e-04
Loss = 7.5011e-02, PNorm = 62.8444, GNorm = 0.5014, lr_0 = 4.2196e-04
Loss = 8.7001e-02, PNorm = 62.8485, GNorm = 0.8461, lr_0 = 4.2167e-04
Loss = 8.3973e-02, PNorm = 62.8546, GNorm = 0.5629, lr_0 = 4.2138e-04
Loss = 8.0358e-02, PNorm = 62.8603, GNorm = 0.5426, lr_0 = 4.2109e-04
Loss = 8.1013e-02, PNorm = 62.8715, GNorm = 0.5186, lr_0 = 4.2080e-04
Loss = 8.9515e-02, PNorm = 62.8817, GNorm = 0.5919, lr_0 = 4.2051e-04
Loss = 8.2067e-02, PNorm = 62.8901, GNorm = 0.7868, lr_0 = 4.2023e-04
Loss = 7.6235e-02, PNorm = 62.8991, GNorm = 0.5266, lr_0 = 4.1994e-04
Loss = 7.9424e-02, PNorm = 62.9096, GNorm = 0.7287, lr_0 = 4.1965e-04
Loss = 8.1915e-02, PNorm = 62.9223, GNorm = 0.5612, lr_0 = 4.1936e-04
Loss = 6.9424e-02, PNorm = 62.9321, GNorm = 0.6001, lr_0 = 4.1907e-04
Loss = 7.7090e-02, PNorm = 62.9413, GNorm = 0.9203, lr_0 = 4.1879e-04
Loss = 8.2578e-02, PNorm = 62.9492, GNorm = 0.6751, lr_0 = 4.1850e-04
Loss = 7.4659e-02, PNorm = 62.9583, GNorm = 0.8372, lr_0 = 4.1821e-04
Loss = 7.8531e-02, PNorm = 62.9685, GNorm = 0.4768, lr_0 = 4.1793e-04
Loss = 8.0356e-02, PNorm = 62.9757, GNorm = 1.3448, lr_0 = 4.1764e-04
Loss = 7.3040e-02, PNorm = 62.9837, GNorm = 0.8989, lr_0 = 4.1736e-04
Loss = 9.4392e-02, PNorm = 62.9965, GNorm = 0.6845, lr_0 = 4.1707e-04
Loss = 7.9241e-02, PNorm = 63.0107, GNorm = 0.7399, lr_0 = 4.1678e-04
Loss = 8.8438e-02, PNorm = 63.0245, GNorm = 0.6592, lr_0 = 4.1650e-04
Loss = 7.0064e-02, PNorm = 63.0340, GNorm = 0.7464, lr_0 = 4.1621e-04
Loss = 8.1720e-02, PNorm = 63.0427, GNorm = 0.5771, lr_0 = 4.1593e-04
Loss = 8.3373e-02, PNorm = 63.0493, GNorm = 0.6927, lr_0 = 4.1564e-04
Loss = 7.4385e-02, PNorm = 63.0576, GNorm = 1.1685, lr_0 = 4.1536e-04
Loss = 8.7487e-02, PNorm = 63.0653, GNorm = 0.8915, lr_0 = 4.1507e-04
Loss = 7.9445e-02, PNorm = 63.0748, GNorm = 0.5363, lr_0 = 4.1479e-04
Loss = 8.3011e-02, PNorm = 63.0837, GNorm = 0.9206, lr_0 = 4.1450e-04
Loss = 8.0548e-02, PNorm = 63.0918, GNorm = 0.6323, lr_0 = 4.1422e-04
Loss = 8.7590e-02, PNorm = 63.1017, GNorm = 0.6902, lr_0 = 4.1394e-04
Loss = 7.9931e-02, PNorm = 63.1086, GNorm = 0.6567, lr_0 = 4.1365e-04
Loss = 8.7101e-02, PNorm = 63.1160, GNorm = 0.7405, lr_0 = 4.1337e-04
Loss = 7.9531e-02, PNorm = 63.1236, GNorm = 0.5895, lr_0 = 4.1309e-04
Loss = 7.8484e-02, PNorm = 63.1326, GNorm = 0.6444, lr_0 = 4.1280e-04
Loss = 9.1083e-02, PNorm = 63.1431, GNorm = 0.7161, lr_0 = 4.1252e-04
Loss = 8.6975e-02, PNorm = 63.1533, GNorm = 0.6721, lr_0 = 4.1224e-04
Loss = 8.1189e-02, PNorm = 63.1623, GNorm = 0.7474, lr_0 = 4.1196e-04
Loss = 1.0015e-01, PNorm = 63.1709, GNorm = 1.1320, lr_0 = 4.1167e-04
Loss = 9.0014e-02, PNorm = 63.1763, GNorm = 0.5587, lr_0 = 4.1139e-04
Loss = 8.7808e-02, PNorm = 63.1870, GNorm = 0.7018, lr_0 = 4.1111e-04
Loss = 9.7441e-02, PNorm = 63.1951, GNorm = 0.8650, lr_0 = 4.1083e-04
Loss = 8.4231e-02, PNorm = 63.2054, GNorm = 0.7711, lr_0 = 4.1055e-04
Loss = 8.1058e-02, PNorm = 63.2155, GNorm = 0.7707, lr_0 = 4.1027e-04
Loss = 8.4450e-02, PNorm = 63.2255, GNorm = 0.8778, lr_0 = 4.0998e-04
Loss = 9.6619e-02, PNorm = 63.2328, GNorm = 0.7529, lr_0 = 4.0970e-04
Loss = 8.3076e-02, PNorm = 63.2426, GNorm = 0.8230, lr_0 = 4.0942e-04
Loss = 8.6054e-02, PNorm = 63.2531, GNorm = 0.6802, lr_0 = 4.0914e-04
Loss = 9.0150e-02, PNorm = 63.2604, GNorm = 0.6775, lr_0 = 4.0886e-04
Loss = 7.9265e-02, PNorm = 63.2659, GNorm = 0.9146, lr_0 = 4.0858e-04
Loss = 7.9321e-02, PNorm = 63.2746, GNorm = 0.5504, lr_0 = 4.0830e-04
Loss = 7.4162e-02, PNorm = 63.2809, GNorm = 0.5343, lr_0 = 4.0802e-04
Loss = 7.3651e-02, PNorm = 63.2836, GNorm = 0.4827, lr_0 = 4.0774e-04
Loss = 8.5428e-02, PNorm = 63.2859, GNorm = 1.3103, lr_0 = 4.0746e-04
Loss = 7.8040e-02, PNorm = 63.2909, GNorm = 0.5921, lr_0 = 4.0718e-04
Loss = 8.6904e-02, PNorm = 63.2991, GNorm = 0.6032, lr_0 = 4.0691e-04
Loss = 8.8181e-02, PNorm = 63.3062, GNorm = 0.6545, lr_0 = 4.0663e-04
Loss = 8.2619e-02, PNorm = 63.3162, GNorm = 0.8797, lr_0 = 4.0635e-04
Loss = 7.4670e-02, PNorm = 63.3272, GNorm = 0.6737, lr_0 = 4.0607e-04
Loss = 8.9458e-02, PNorm = 63.3356, GNorm = 0.4814, lr_0 = 4.0579e-04
Loss = 8.6925e-02, PNorm = 63.3430, GNorm = 0.7902, lr_0 = 4.0551e-04
Loss = 8.5631e-02, PNorm = 63.3523, GNorm = 0.7393, lr_0 = 4.0524e-04
Loss = 7.2868e-02, PNorm = 63.3647, GNorm = 0.4799, lr_0 = 4.0496e-04
Loss = 9.8253e-02, PNorm = 63.3747, GNorm = 0.6561, lr_0 = 4.0468e-04
Validation mae = 0.396593
Epoch 13
Loss = 7.4415e-02, PNorm = 63.3823, GNorm = 0.6659, lr_0 = 4.0440e-04
Loss = 7.5644e-02, PNorm = 63.3921, GNorm = 0.4624, lr_0 = 4.0413e-04
Loss = 7.3447e-02, PNorm = 63.4041, GNorm = 1.0197, lr_0 = 4.0385e-04
Loss = 7.7688e-02, PNorm = 63.4155, GNorm = 0.8235, lr_0 = 4.0357e-04
Loss = 8.4499e-02, PNorm = 63.4247, GNorm = 0.5720, lr_0 = 4.0330e-04
Loss = 6.8826e-02, PNorm = 63.4342, GNorm = 0.4557, lr_0 = 4.0302e-04
Loss = 6.2846e-02, PNorm = 63.4413, GNorm = 0.5445, lr_0 = 4.0274e-04
Loss = 8.5736e-02, PNorm = 63.4492, GNorm = 0.4658, lr_0 = 4.0247e-04
Loss = 5.8531e-02, PNorm = 63.4570, GNorm = 0.7082, lr_0 = 4.0219e-04
Loss = 6.3032e-02, PNorm = 63.4643, GNorm = 0.5049, lr_0 = 4.0192e-04
Loss = 7.0354e-02, PNorm = 63.4714, GNorm = 0.6485, lr_0 = 4.0164e-04
Loss = 7.4682e-02, PNorm = 63.4758, GNorm = 0.8879, lr_0 = 4.0137e-04
Loss = 6.6617e-02, PNorm = 63.4831, GNorm = 0.6403, lr_0 = 4.0109e-04
Loss = 7.3887e-02, PNorm = 63.4888, GNorm = 0.5387, lr_0 = 4.0082e-04
Loss = 6.0623e-02, PNorm = 63.4967, GNorm = 0.5892, lr_0 = 4.0054e-04
Loss = 7.2423e-02, PNorm = 63.5033, GNorm = 0.4404, lr_0 = 4.0027e-04
Loss = 7.2442e-02, PNorm = 63.5110, GNorm = 0.9398, lr_0 = 3.9999e-04
Loss = 8.1598e-02, PNorm = 63.5174, GNorm = 1.1008, lr_0 = 3.9972e-04
Loss = 6.6235e-02, PNorm = 63.5265, GNorm = 0.4970, lr_0 = 3.9945e-04
Loss = 7.3038e-02, PNorm = 63.5308, GNorm = 0.8733, lr_0 = 3.9917e-04
Loss = 7.9203e-02, PNorm = 63.5409, GNorm = 0.6960, lr_0 = 3.9890e-04
Loss = 6.9677e-02, PNorm = 63.5490, GNorm = 0.5990, lr_0 = 3.9863e-04
Loss = 6.3220e-02, PNorm = 63.5592, GNorm = 0.4881, lr_0 = 3.9835e-04
Loss = 7.1122e-02, PNorm = 63.5690, GNorm = 0.8353, lr_0 = 3.9808e-04
Loss = 7.0015e-02, PNorm = 63.5777, GNorm = 0.9475, lr_0 = 3.9781e-04
Loss = 6.4904e-02, PNorm = 63.5850, GNorm = 1.4033, lr_0 = 3.9753e-04
Loss = 7.6603e-02, PNorm = 63.5944, GNorm = 0.6290, lr_0 = 3.9726e-04
Loss = 9.3844e-02, PNorm = 63.6064, GNorm = 0.7427, lr_0 = 3.9699e-04
Loss = 8.1004e-02, PNorm = 63.6190, GNorm = 0.7564, lr_0 = 3.9672e-04
Loss = 6.8362e-02, PNorm = 63.6287, GNorm = 0.6677, lr_0 = 3.9645e-04
Loss = 8.3116e-02, PNorm = 63.6356, GNorm = 0.6962, lr_0 = 3.9617e-04
Loss = 6.3994e-02, PNorm = 63.6450, GNorm = 0.7878, lr_0 = 3.9590e-04
Loss = 7.5710e-02, PNorm = 63.6541, GNorm = 0.6420, lr_0 = 3.9563e-04
Loss = 6.6151e-02, PNorm = 63.6620, GNorm = 0.6393, lr_0 = 3.9536e-04
Loss = 6.2979e-02, PNorm = 63.6688, GNorm = 0.5795, lr_0 = 3.9509e-04
Loss = 7.2133e-02, PNorm = 63.6755, GNorm = 0.5648, lr_0 = 3.9482e-04
Loss = 7.4715e-02, PNorm = 63.6847, GNorm = 0.5829, lr_0 = 3.9455e-04
Loss = 6.6203e-02, PNorm = 63.6947, GNorm = 0.7564, lr_0 = 3.9428e-04
Loss = 7.4262e-02, PNorm = 63.7055, GNorm = 0.8796, lr_0 = 3.9401e-04
Loss = 8.2235e-02, PNorm = 63.7113, GNorm = 0.7418, lr_0 = 3.9374e-04
Loss = 7.1512e-02, PNorm = 63.7159, GNorm = 0.4671, lr_0 = 3.9347e-04
Loss = 7.1406e-02, PNorm = 63.7258, GNorm = 0.5729, lr_0 = 3.9320e-04
Loss = 7.9319e-02, PNorm = 63.7333, GNorm = 0.8382, lr_0 = 3.9293e-04
Loss = 7.5458e-02, PNorm = 63.7445, GNorm = 0.9914, lr_0 = 3.9266e-04
Loss = 8.2848e-02, PNorm = 63.7575, GNorm = 0.6360, lr_0 = 3.9239e-04
Loss = 6.9889e-02, PNorm = 63.7674, GNorm = 0.5643, lr_0 = 3.9212e-04
Loss = 8.3873e-02, PNorm = 63.7759, GNorm = 0.8123, lr_0 = 3.9185e-04
Loss = 7.8087e-02, PNorm = 63.7850, GNorm = 0.6583, lr_0 = 3.9159e-04
Loss = 7.9685e-02, PNorm = 63.7917, GNorm = 0.6201, lr_0 = 3.9132e-04
Loss = 8.3391e-02, PNorm = 63.8001, GNorm = 0.6695, lr_0 = 3.9105e-04
Loss = 7.5558e-02, PNorm = 63.8105, GNorm = 0.7462, lr_0 = 3.9078e-04
Loss = 6.6083e-02, PNorm = 63.8193, GNorm = 0.6608, lr_0 = 3.9051e-04
Loss = 8.2582e-02, PNorm = 63.8269, GNorm = 0.8119, lr_0 = 3.9025e-04
Loss = 7.9584e-02, PNorm = 63.8373, GNorm = 0.8745, lr_0 = 3.8998e-04
Loss = 7.8965e-02, PNorm = 63.8489, GNorm = 0.7991, lr_0 = 3.8971e-04
Loss = 7.1272e-02, PNorm = 63.8571, GNorm = 0.9835, lr_0 = 3.8945e-04
Loss = 7.7892e-02, PNorm = 63.8642, GNorm = 0.7328, lr_0 = 3.8918e-04
Loss = 6.3186e-02, PNorm = 63.8748, GNorm = 1.0631, lr_0 = 3.8891e-04
Loss = 7.4103e-02, PNorm = 63.8853, GNorm = 0.9093, lr_0 = 3.8865e-04
Loss = 7.3298e-02, PNorm = 63.8937, GNorm = 0.6767, lr_0 = 3.8838e-04
Loss = 9.3455e-02, PNorm = 63.9027, GNorm = 0.8444, lr_0 = 3.8811e-04
Loss = 7.7803e-02, PNorm = 63.9122, GNorm = 0.8447, lr_0 = 3.8785e-04
Loss = 7.5224e-02, PNorm = 63.9255, GNorm = 0.7762, lr_0 = 3.8758e-04
Loss = 5.9509e-02, PNorm = 63.9354, GNorm = 0.6657, lr_0 = 3.8732e-04
Loss = 8.9516e-02, PNorm = 63.9394, GNorm = 0.6525, lr_0 = 3.8705e-04
Loss = 7.2941e-02, PNorm = 63.9415, GNorm = 0.5274, lr_0 = 3.8679e-04
Loss = 7.4824e-02, PNorm = 63.9464, GNorm = 0.7992, lr_0 = 3.8652e-04
Loss = 7.3077e-02, PNorm = 63.9553, GNorm = 0.6376, lr_0 = 3.8626e-04
Loss = 7.3146e-02, PNorm = 63.9620, GNorm = 0.9483, lr_0 = 3.8599e-04
Loss = 8.4749e-02, PNorm = 63.9688, GNorm = 0.5924, lr_0 = 3.8573e-04
Loss = 7.6586e-02, PNorm = 63.9757, GNorm = 0.6521, lr_0 = 3.8546e-04
Loss = 8.2529e-02, PNorm = 63.9849, GNorm = 0.8583, lr_0 = 3.8520e-04
Loss = 7.2028e-02, PNorm = 63.9926, GNorm = 0.5931, lr_0 = 3.8493e-04
Loss = 7.2149e-02, PNorm = 64.0020, GNorm = 0.4975, lr_0 = 3.8467e-04
Loss = 8.6311e-02, PNorm = 64.0110, GNorm = 0.9820, lr_0 = 3.8441e-04
Loss = 7.1076e-02, PNorm = 64.0223, GNorm = 0.7473, lr_0 = 3.8414e-04
Loss = 7.5913e-02, PNorm = 64.0336, GNorm = 0.4030, lr_0 = 3.8388e-04
Loss = 9.0514e-02, PNorm = 64.0455, GNorm = 1.0801, lr_0 = 3.8362e-04
Loss = 7.2161e-02, PNorm = 64.0527, GNorm = 0.4798, lr_0 = 3.8336e-04
Loss = 8.2418e-02, PNorm = 64.0608, GNorm = 0.6620, lr_0 = 3.8309e-04
Loss = 6.9774e-02, PNorm = 64.0663, GNorm = 0.4918, lr_0 = 3.8283e-04
Loss = 7.0884e-02, PNorm = 64.0743, GNorm = 0.7906, lr_0 = 3.8257e-04
Loss = 7.1810e-02, PNorm = 64.0809, GNorm = 0.7590, lr_0 = 3.8231e-04
Loss = 6.9318e-02, PNorm = 64.0864, GNorm = 0.5830, lr_0 = 3.8204e-04
Loss = 6.7100e-02, PNorm = 64.0924, GNorm = 0.4452, lr_0 = 3.8178e-04
Loss = 7.8438e-02, PNorm = 64.0983, GNorm = 0.5364, lr_0 = 3.8152e-04
Loss = 8.4609e-02, PNorm = 64.1043, GNorm = 0.4874, lr_0 = 3.8126e-04
Loss = 8.0131e-02, PNorm = 64.1105, GNorm = 0.6483, lr_0 = 3.8100e-04
Loss = 7.6300e-02, PNorm = 64.1172, GNorm = 1.0118, lr_0 = 3.8074e-04
Loss = 8.4106e-02, PNorm = 64.1244, GNorm = 1.0256, lr_0 = 3.8048e-04
Loss = 7.8234e-02, PNorm = 64.1373, GNorm = 0.8364, lr_0 = 3.8022e-04
Loss = 8.3292e-02, PNorm = 64.1491, GNorm = 0.5988, lr_0 = 3.7995e-04
Loss = 7.1963e-02, PNorm = 64.1557, GNorm = 0.6272, lr_0 = 3.7969e-04
Loss = 7.6286e-02, PNorm = 64.1609, GNorm = 0.9972, lr_0 = 3.7943e-04
Loss = 7.8120e-02, PNorm = 64.1706, GNorm = 0.6725, lr_0 = 3.7917e-04
Loss = 8.7607e-02, PNorm = 64.1825, GNorm = 0.6123, lr_0 = 3.7891e-04
Loss = 7.6344e-02, PNorm = 64.1922, GNorm = 0.7738, lr_0 = 3.7866e-04
Loss = 8.5134e-02, PNorm = 64.2021, GNorm = 0.7163, lr_0 = 3.7840e-04
Loss = 6.4143e-02, PNorm = 64.2124, GNorm = 0.5640, lr_0 = 3.7814e-04
Loss = 8.2198e-02, PNorm = 64.2212, GNorm = 0.6628, lr_0 = 3.7788e-04
Loss = 8.0237e-02, PNorm = 64.2269, GNorm = 0.5938, lr_0 = 3.7762e-04
Loss = 6.4105e-02, PNorm = 64.2311, GNorm = 0.5307, lr_0 = 3.7736e-04
Loss = 8.7052e-02, PNorm = 64.2418, GNorm = 0.7356, lr_0 = 3.7710e-04
Loss = 8.6820e-02, PNorm = 64.2509, GNorm = 0.8015, lr_0 = 3.7684e-04
Loss = 1.0369e-01, PNorm = 64.2605, GNorm = 1.0797, lr_0 = 3.7659e-04
Loss = 8.2956e-02, PNorm = 64.2698, GNorm = 0.6646, lr_0 = 3.7633e-04
Loss = 8.1137e-02, PNorm = 64.2806, GNorm = 0.6541, lr_0 = 3.7607e-04
Loss = 7.5914e-02, PNorm = 64.2861, GNorm = 0.6304, lr_0 = 3.7581e-04
Loss = 7.0414e-02, PNorm = 64.2924, GNorm = 0.6977, lr_0 = 3.7555e-04
Loss = 7.3376e-02, PNorm = 64.3025, GNorm = 0.4250, lr_0 = 3.7530e-04
Loss = 7.1740e-02, PNorm = 64.3115, GNorm = 0.8828, lr_0 = 3.7504e-04
Loss = 7.1240e-02, PNorm = 64.3206, GNorm = 0.4270, lr_0 = 3.7478e-04
Loss = 7.0514e-02, PNorm = 64.3271, GNorm = 0.6373, lr_0 = 3.7453e-04
Loss = 7.5099e-02, PNorm = 64.3299, GNorm = 0.7486, lr_0 = 3.7427e-04
Loss = 7.3302e-02, PNorm = 64.3359, GNorm = 0.5109, lr_0 = 3.7401e-04
Loss = 6.7581e-02, PNorm = 64.3402, GNorm = 0.4525, lr_0 = 3.7376e-04
Loss = 7.2369e-02, PNorm = 64.3470, GNorm = 0.9644, lr_0 = 3.7350e-04
Loss = 7.3495e-02, PNorm = 64.3528, GNorm = 0.5360, lr_0 = 3.7325e-04
Loss = 7.0017e-02, PNorm = 64.3571, GNorm = 0.5278, lr_0 = 3.7299e-04
Loss = 8.0010e-02, PNorm = 64.3634, GNorm = 0.6137, lr_0 = 3.7273e-04
Validation mae = 0.386392
Epoch 14
Loss = 7.2324e-02, PNorm = 64.3709, GNorm = 0.5745, lr_0 = 3.7248e-04
Loss = 7.8324e-02, PNorm = 64.3845, GNorm = 0.8223, lr_0 = 3.7222e-04
Loss = 6.7999e-02, PNorm = 64.3972, GNorm = 0.5906, lr_0 = 3.7197e-04
Loss = 5.8263e-02, PNorm = 64.4048, GNorm = 0.5618, lr_0 = 3.7171e-04
Loss = 6.7695e-02, PNorm = 64.4147, GNorm = 0.4063, lr_0 = 3.7146e-04
Loss = 6.8906e-02, PNorm = 64.4240, GNorm = 0.5257, lr_0 = 3.7120e-04
Loss = 6.8099e-02, PNorm = 64.4345, GNorm = 0.9416, lr_0 = 3.7095e-04
Loss = 7.0977e-02, PNorm = 64.4459, GNorm = 0.8252, lr_0 = 3.7070e-04
Loss = 7.0879e-02, PNorm = 64.4580, GNorm = 0.6955, lr_0 = 3.7044e-04
Loss = 7.4535e-02, PNorm = 64.4672, GNorm = 0.4691, lr_0 = 3.7019e-04
Loss = 6.3848e-02, PNorm = 64.4748, GNorm = 0.5463, lr_0 = 3.6993e-04
Loss = 6.2308e-02, PNorm = 64.4833, GNorm = 0.4111, lr_0 = 3.6968e-04
Loss = 5.8798e-02, PNorm = 64.4901, GNorm = 0.7248, lr_0 = 3.6943e-04
Loss = 6.5186e-02, PNorm = 64.4952, GNorm = 0.6377, lr_0 = 3.6917e-04
Loss = 7.8784e-02, PNorm = 64.5018, GNorm = 0.4662, lr_0 = 3.6892e-04
Loss = 5.8779e-02, PNorm = 64.5079, GNorm = 0.4220, lr_0 = 3.6867e-04
Loss = 5.9992e-02, PNorm = 64.5155, GNorm = 0.5912, lr_0 = 3.6842e-04
Loss = 7.1101e-02, PNorm = 64.5216, GNorm = 0.7051, lr_0 = 3.6816e-04
Loss = 6.6843e-02, PNorm = 64.5288, GNorm = 0.6138, lr_0 = 3.6791e-04
Loss = 7.7076e-02, PNorm = 64.5376, GNorm = 1.1558, lr_0 = 3.6766e-04
Loss = 7.5346e-02, PNorm = 64.5510, GNorm = 0.6694, lr_0 = 3.6741e-04
Loss = 7.2813e-02, PNorm = 64.5628, GNorm = 0.9780, lr_0 = 3.6716e-04
Loss = 7.5593e-02, PNorm = 64.5713, GNorm = 0.8226, lr_0 = 3.6690e-04
Loss = 6.4826e-02, PNorm = 64.5772, GNorm = 0.6852, lr_0 = 3.6665e-04
Loss = 6.7587e-02, PNorm = 64.5846, GNorm = 0.7060, lr_0 = 3.6640e-04
Loss = 7.2929e-02, PNorm = 64.5918, GNorm = 0.8320, lr_0 = 3.6615e-04
Loss = 6.9681e-02, PNorm = 64.6020, GNorm = 0.5869, lr_0 = 3.6590e-04
Loss = 7.4508e-02, PNorm = 64.6119, GNorm = 0.6567, lr_0 = 3.6565e-04
Loss = 6.7799e-02, PNorm = 64.6175, GNorm = 0.5050, lr_0 = 3.6540e-04
Loss = 7.0808e-02, PNorm = 64.6229, GNorm = 0.5809, lr_0 = 3.6515e-04
Loss = 7.4219e-02, PNorm = 64.6292, GNorm = 0.6152, lr_0 = 3.6490e-04
Loss = 7.6544e-02, PNorm = 64.6361, GNorm = 0.6573, lr_0 = 3.6465e-04
Loss = 7.2922e-02, PNorm = 64.6435, GNorm = 0.7030, lr_0 = 3.6440e-04
Loss = 6.8276e-02, PNorm = 64.6537, GNorm = 1.0210, lr_0 = 3.6415e-04
Loss = 6.7025e-02, PNorm = 64.6633, GNorm = 0.5713, lr_0 = 3.6390e-04
Loss = 6.8657e-02, PNorm = 64.6719, GNorm = 0.8297, lr_0 = 3.6365e-04
Loss = 7.0855e-02, PNorm = 64.6788, GNorm = 0.4907, lr_0 = 3.6340e-04
Loss = 6.6711e-02, PNorm = 64.6876, GNorm = 0.5884, lr_0 = 3.6315e-04
Loss = 7.5974e-02, PNorm = 64.6956, GNorm = 0.4425, lr_0 = 3.6290e-04
Loss = 6.2897e-02, PNorm = 64.7007, GNorm = 0.8206, lr_0 = 3.6266e-04
Loss = 7.5776e-02, PNorm = 64.7078, GNorm = 0.6039, lr_0 = 3.6241e-04
Loss = 6.5005e-02, PNorm = 64.7147, GNorm = 0.5638, lr_0 = 3.6216e-04
Loss = 7.1892e-02, PNorm = 64.7207, GNorm = 0.4790, lr_0 = 3.6191e-04
Loss = 6.9013e-02, PNorm = 64.7265, GNorm = 0.6652, lr_0 = 3.6166e-04
Loss = 7.2217e-02, PNorm = 64.7326, GNorm = 0.3873, lr_0 = 3.6141e-04
Loss = 6.6864e-02, PNorm = 64.7398, GNorm = 0.4951, lr_0 = 3.6117e-04
Loss = 6.2353e-02, PNorm = 64.7466, GNorm = 0.5959, lr_0 = 3.6092e-04
Loss = 6.9678e-02, PNorm = 64.7544, GNorm = 0.4373, lr_0 = 3.6067e-04
Loss = 6.5979e-02, PNorm = 64.7634, GNorm = 0.5040, lr_0 = 3.6043e-04
Loss = 6.3495e-02, PNorm = 64.7719, GNorm = 0.8368, lr_0 = 3.6018e-04
Loss = 7.2985e-02, PNorm = 64.7775, GNorm = 1.1467, lr_0 = 3.5993e-04
Loss = 6.8247e-02, PNorm = 64.7844, GNorm = 0.4539, lr_0 = 3.5969e-04
Loss = 7.4962e-02, PNorm = 64.7953, GNorm = 0.6466, lr_0 = 3.5944e-04
Loss = 6.4055e-02, PNorm = 64.8035, GNorm = 0.6006, lr_0 = 3.5919e-04
Loss = 6.8545e-02, PNorm = 64.8145, GNorm = 0.7594, lr_0 = 3.5895e-04
Loss = 7.7178e-02, PNorm = 64.8264, GNorm = 0.6058, lr_0 = 3.5870e-04
Loss = 6.2730e-02, PNorm = 64.8345, GNorm = 0.5035, lr_0 = 3.5845e-04
Loss = 7.5177e-02, PNorm = 64.8408, GNorm = 0.6309, lr_0 = 3.5821e-04
Loss = 7.5864e-02, PNorm = 64.8441, GNorm = 0.4722, lr_0 = 3.5796e-04
Loss = 7.7487e-02, PNorm = 64.8464, GNorm = 0.5991, lr_0 = 3.5772e-04
Loss = 6.7374e-02, PNorm = 64.8557, GNorm = 0.6404, lr_0 = 3.5747e-04
Loss = 6.8387e-02, PNorm = 64.8628, GNorm = 0.5171, lr_0 = 3.5723e-04
Loss = 6.2841e-02, PNorm = 64.8679, GNorm = 0.9711, lr_0 = 3.5698e-04
Loss = 6.4896e-02, PNorm = 64.8761, GNorm = 0.6093, lr_0 = 3.5674e-04
Loss = 6.5919e-02, PNorm = 64.8844, GNorm = 0.4409, lr_0 = 3.5650e-04
Loss = 6.6348e-02, PNorm = 64.8905, GNorm = 0.6236, lr_0 = 3.5625e-04
Loss = 7.2693e-02, PNorm = 64.8989, GNorm = 0.5309, lr_0 = 3.5601e-04
Loss = 7.7884e-02, PNorm = 64.9091, GNorm = 0.4442, lr_0 = 3.5576e-04
Loss = 6.2810e-02, PNorm = 64.9173, GNorm = 0.4619, lr_0 = 3.5552e-04
Loss = 6.7656e-02, PNorm = 64.9243, GNorm = 0.6752, lr_0 = 3.5528e-04
Loss = 6.7016e-02, PNorm = 64.9341, GNorm = 0.6481, lr_0 = 3.5503e-04
Loss = 7.1700e-02, PNorm = 64.9380, GNorm = 0.6760, lr_0 = 3.5479e-04
Loss = 6.6976e-02, PNorm = 64.9423, GNorm = 0.6233, lr_0 = 3.5455e-04
Loss = 7.0256e-02, PNorm = 64.9477, GNorm = 0.6687, lr_0 = 3.5430e-04
Loss = 8.0479e-02, PNorm = 64.9592, GNorm = 0.6896, lr_0 = 3.5406e-04
Loss = 6.7046e-02, PNorm = 64.9693, GNorm = 0.5480, lr_0 = 3.5382e-04
Loss = 6.2047e-02, PNorm = 64.9777, GNorm = 0.4445, lr_0 = 3.5358e-04
Loss = 7.0541e-02, PNorm = 64.9833, GNorm = 0.4466, lr_0 = 3.5333e-04
Loss = 8.1440e-02, PNorm = 64.9875, GNorm = 0.7714, lr_0 = 3.5309e-04
Loss = 7.6800e-02, PNorm = 65.0000, GNorm = 0.5602, lr_0 = 3.5285e-04
Loss = 7.6718e-02, PNorm = 65.0096, GNorm = 0.9625, lr_0 = 3.5261e-04
Loss = 5.9814e-02, PNorm = 65.0162, GNorm = 0.5214, lr_0 = 3.5237e-04
Loss = 7.8212e-02, PNorm = 65.0201, GNorm = 0.6030, lr_0 = 3.5212e-04
Loss = 7.7976e-02, PNorm = 65.0260, GNorm = 0.7465, lr_0 = 3.5188e-04
Loss = 7.0381e-02, PNorm = 65.0347, GNorm = 0.8056, lr_0 = 3.5164e-04
Loss = 7.7742e-02, PNorm = 65.0411, GNorm = 0.6837, lr_0 = 3.5140e-04
Loss = 7.1567e-02, PNorm = 65.0463, GNorm = 0.4532, lr_0 = 3.5116e-04
Loss = 6.8913e-02, PNorm = 65.0534, GNorm = 0.9966, lr_0 = 3.5092e-04
Loss = 6.2486e-02, PNorm = 65.0608, GNorm = 0.7829, lr_0 = 3.5068e-04
Loss = 7.3928e-02, PNorm = 65.0674, GNorm = 0.4942, lr_0 = 3.5044e-04
Loss = 6.8460e-02, PNorm = 65.0740, GNorm = 0.7428, lr_0 = 3.5020e-04
Loss = 6.7476e-02, PNorm = 65.0820, GNorm = 0.6746, lr_0 = 3.4996e-04
Loss = 7.9150e-02, PNorm = 65.0857, GNorm = 0.5518, lr_0 = 3.4972e-04
Loss = 8.2250e-02, PNorm = 65.0893, GNorm = 0.8950, lr_0 = 3.4948e-04
Loss = 7.8972e-02, PNorm = 65.0966, GNorm = 0.9217, lr_0 = 3.4924e-04
Loss = 7.2067e-02, PNorm = 65.1061, GNorm = 0.7368, lr_0 = 3.4900e-04
Loss = 7.1980e-02, PNorm = 65.1146, GNorm = 0.7399, lr_0 = 3.4876e-04
Loss = 6.1222e-02, PNorm = 65.1229, GNorm = 0.6375, lr_0 = 3.4852e-04
Loss = 7.4643e-02, PNorm = 65.1295, GNorm = 0.3662, lr_0 = 3.4828e-04
Loss = 6.7178e-02, PNorm = 65.1370, GNorm = 0.5381, lr_0 = 3.4805e-04
Loss = 6.9323e-02, PNorm = 65.1429, GNorm = 0.6147, lr_0 = 3.4781e-04
Loss = 6.7815e-02, PNorm = 65.1481, GNorm = 0.4637, lr_0 = 3.4757e-04
Loss = 6.6802e-02, PNorm = 65.1536, GNorm = 0.8437, lr_0 = 3.4733e-04
Loss = 7.9381e-02, PNorm = 65.1618, GNorm = 0.7578, lr_0 = 3.4709e-04
Loss = 6.0870e-02, PNorm = 65.1688, GNorm = 0.4672, lr_0 = 3.4686e-04
Loss = 7.6275e-02, PNorm = 65.1734, GNorm = 0.6846, lr_0 = 3.4662e-04
Loss = 6.9261e-02, PNorm = 65.1788, GNorm = 0.4290, lr_0 = 3.4638e-04
Loss = 6.6056e-02, PNorm = 65.1859, GNorm = 0.4747, lr_0 = 3.4614e-04
Loss = 7.1766e-02, PNorm = 65.1904, GNorm = 0.7588, lr_0 = 3.4591e-04
Loss = 6.8940e-02, PNorm = 65.1954, GNorm = 1.0888, lr_0 = 3.4567e-04
Loss = 6.9712e-02, PNorm = 65.2040, GNorm = 0.8618, lr_0 = 3.4543e-04
Loss = 7.2133e-02, PNorm = 65.2096, GNorm = 0.5985, lr_0 = 3.4520e-04
Loss = 7.8930e-02, PNorm = 65.2145, GNorm = 0.6275, lr_0 = 3.4496e-04
Loss = 7.4713e-02, PNorm = 65.2212, GNorm = 0.6840, lr_0 = 3.4472e-04
Loss = 8.5668e-02, PNorm = 65.2286, GNorm = 0.4778, lr_0 = 3.4449e-04
Loss = 7.2209e-02, PNorm = 65.2353, GNorm = 0.5726, lr_0 = 3.4425e-04
Loss = 8.9114e-02, PNorm = 65.2450, GNorm = 1.4775, lr_0 = 3.4402e-04
Loss = 7.5157e-02, PNorm = 65.2571, GNorm = 0.4486, lr_0 = 3.4378e-04
Loss = 7.7361e-02, PNorm = 65.2657, GNorm = 0.7399, lr_0 = 3.4354e-04
Loss = 5.7684e-02, PNorm = 65.2728, GNorm = 0.5515, lr_0 = 3.4331e-04
Validation mae = 0.390865
Epoch 15
Loss = 7.0204e-02, PNorm = 65.2794, GNorm = 1.1298, lr_0 = 3.4307e-04
Loss = 6.3911e-02, PNorm = 65.2848, GNorm = 0.5584, lr_0 = 3.4284e-04
Loss = 5.6471e-02, PNorm = 65.2913, GNorm = 0.7053, lr_0 = 3.4260e-04
Loss = 6.6827e-02, PNorm = 65.2993, GNorm = 0.6795, lr_0 = 3.4237e-04
Loss = 6.7247e-02, PNorm = 65.3043, GNorm = 0.5176, lr_0 = 3.4213e-04
Loss = 6.7184e-02, PNorm = 65.3126, GNorm = 0.6448, lr_0 = 3.4190e-04
Loss = 5.8761e-02, PNorm = 65.3205, GNorm = 0.7821, lr_0 = 3.4167e-04
Loss = 5.8586e-02, PNorm = 65.3265, GNorm = 0.4936, lr_0 = 3.4143e-04
Loss = 6.2057e-02, PNorm = 65.3342, GNorm = 0.5969, lr_0 = 3.4120e-04
Loss = 6.1737e-02, PNorm = 65.3437, GNorm = 0.5087, lr_0 = 3.4096e-04
Loss = 6.4507e-02, PNorm = 65.3542, GNorm = 0.4635, lr_0 = 3.4073e-04
Loss = 4.9785e-02, PNorm = 65.3622, GNorm = 0.4455, lr_0 = 3.4050e-04
Loss = 5.8221e-02, PNorm = 65.3684, GNorm = 0.5957, lr_0 = 3.4026e-04
Loss = 5.9803e-02, PNorm = 65.3752, GNorm = 0.5631, lr_0 = 3.4003e-04
Loss = 6.1837e-02, PNorm = 65.3817, GNorm = 0.7819, lr_0 = 3.3980e-04
Loss = 6.1857e-02, PNorm = 65.3878, GNorm = 0.6144, lr_0 = 3.3956e-04
Loss = 6.2361e-02, PNorm = 65.3943, GNorm = 0.6089, lr_0 = 3.3933e-04
Loss = 5.6065e-02, PNorm = 65.4004, GNorm = 0.7010, lr_0 = 3.3910e-04
Loss = 6.8902e-02, PNorm = 65.4056, GNorm = 0.5298, lr_0 = 3.3887e-04
Loss = 6.5300e-02, PNorm = 65.4136, GNorm = 0.6204, lr_0 = 3.3864e-04
Loss = 6.8106e-02, PNorm = 65.4184, GNorm = 0.4882, lr_0 = 3.3840e-04
Loss = 5.7371e-02, PNorm = 65.4257, GNorm = 0.4611, lr_0 = 3.3817e-04
Loss = 7.3229e-02, PNorm = 65.4343, GNorm = 0.5158, lr_0 = 3.3794e-04
Loss = 6.5747e-02, PNorm = 65.4419, GNorm = 0.6374, lr_0 = 3.3771e-04
Loss = 6.7824e-02, PNorm = 65.4465, GNorm = 0.6142, lr_0 = 3.3748e-04
Loss = 6.3878e-02, PNorm = 65.4524, GNorm = 0.4829, lr_0 = 3.3725e-04
Loss = 6.5036e-02, PNorm = 65.4584, GNorm = 0.5855, lr_0 = 3.3701e-04
Loss = 8.0333e-02, PNorm = 65.4647, GNorm = 0.5743, lr_0 = 3.3678e-04
Loss = 6.1781e-02, PNorm = 65.4711, GNorm = 0.6152, lr_0 = 3.3655e-04
Loss = 5.8789e-02, PNorm = 65.4790, GNorm = 0.5665, lr_0 = 3.3632e-04
Loss = 5.6184e-02, PNorm = 65.4848, GNorm = 0.6513, lr_0 = 3.3609e-04
Loss = 6.3097e-02, PNorm = 65.4897, GNorm = 0.4264, lr_0 = 3.3586e-04
Loss = 6.2630e-02, PNorm = 65.4990, GNorm = 0.6925, lr_0 = 3.3563e-04
Loss = 6.7953e-02, PNorm = 65.5101, GNorm = 0.4523, lr_0 = 3.3540e-04
Loss = 6.9839e-02, PNorm = 65.5194, GNorm = 0.5463, lr_0 = 3.3517e-04
Loss = 6.4296e-02, PNorm = 65.5292, GNorm = 0.5986, lr_0 = 3.3494e-04
Loss = 7.7647e-02, PNorm = 65.5373, GNorm = 0.5056, lr_0 = 3.3471e-04
Loss = 5.6716e-02, PNorm = 65.5415, GNorm = 0.5681, lr_0 = 3.3448e-04
Loss = 5.4711e-02, PNorm = 65.5467, GNorm = 0.3777, lr_0 = 3.3425e-04
Loss = 5.8119e-02, PNorm = 65.5564, GNorm = 0.5905, lr_0 = 3.3403e-04
Loss = 5.8125e-02, PNorm = 65.5649, GNorm = 0.6205, lr_0 = 3.3380e-04
Loss = 6.9145e-02, PNorm = 65.5701, GNorm = 0.7620, lr_0 = 3.3357e-04
Loss = 6.2498e-02, PNorm = 65.5741, GNorm = 0.8965, lr_0 = 3.3334e-04
Loss = 6.8545e-02, PNorm = 65.5791, GNorm = 0.7846, lr_0 = 3.3311e-04
Loss = 6.4077e-02, PNorm = 65.5855, GNorm = 0.9149, lr_0 = 3.3288e-04
Loss = 6.1544e-02, PNorm = 65.5891, GNorm = 0.5508, lr_0 = 3.3265e-04
Loss = 7.0836e-02, PNorm = 65.5951, GNorm = 0.4509, lr_0 = 3.3243e-04
Loss = 6.8093e-02, PNorm = 65.6030, GNorm = 1.0485, lr_0 = 3.3220e-04
Loss = 7.3056e-02, PNorm = 65.6090, GNorm = 0.5854, lr_0 = 3.3197e-04
Loss = 6.9036e-02, PNorm = 65.6137, GNorm = 1.0058, lr_0 = 3.3174e-04
Loss = 6.2602e-02, PNorm = 65.6194, GNorm = 0.7099, lr_0 = 3.3152e-04
Loss = 5.9435e-02, PNorm = 65.6251, GNorm = 0.6439, lr_0 = 3.3129e-04
Loss = 7.0160e-02, PNorm = 65.6355, GNorm = 0.5242, lr_0 = 3.3106e-04
Loss = 6.6636e-02, PNorm = 65.6437, GNorm = 0.6349, lr_0 = 3.3084e-04
Loss = 6.1647e-02, PNorm = 65.6507, GNorm = 0.6731, lr_0 = 3.3061e-04
Loss = 7.2647e-02, PNorm = 65.6590, GNorm = 0.4376, lr_0 = 3.3038e-04
Loss = 6.5719e-02, PNorm = 65.6639, GNorm = 0.5523, lr_0 = 3.3016e-04
Loss = 6.5849e-02, PNorm = 65.6707, GNorm = 0.6088, lr_0 = 3.2993e-04
Loss = 6.4240e-02, PNorm = 65.6784, GNorm = 0.7192, lr_0 = 3.2970e-04
Loss = 6.5822e-02, PNorm = 65.6840, GNorm = 0.4378, lr_0 = 3.2948e-04
Loss = 6.7447e-02, PNorm = 65.6888, GNorm = 0.4714, lr_0 = 3.2925e-04
Loss = 7.2520e-02, PNorm = 65.6952, GNorm = 0.7712, lr_0 = 3.2903e-04
Loss = 6.0364e-02, PNorm = 65.7001, GNorm = 0.5094, lr_0 = 3.2880e-04
Loss = 7.5709e-02, PNorm = 65.7086, GNorm = 0.6186, lr_0 = 3.2858e-04
Loss = 6.2460e-02, PNorm = 65.7160, GNorm = 0.6005, lr_0 = 3.2835e-04
Loss = 6.2624e-02, PNorm = 65.7232, GNorm = 0.5518, lr_0 = 3.2813e-04
Loss = 7.6440e-02, PNorm = 65.7303, GNorm = 0.8145, lr_0 = 3.2790e-04
Loss = 5.7551e-02, PNorm = 65.7391, GNorm = 0.8590, lr_0 = 3.2768e-04
Loss = 6.3114e-02, PNorm = 65.7486, GNorm = 0.7106, lr_0 = 3.2745e-04
Loss = 6.7353e-02, PNorm = 65.7559, GNorm = 0.6142, lr_0 = 3.2723e-04
Loss = 6.0028e-02, PNorm = 65.7613, GNorm = 0.5042, lr_0 = 3.2700e-04
Loss = 6.2173e-02, PNorm = 65.7646, GNorm = 0.4798, lr_0 = 3.2678e-04
Loss = 6.2290e-02, PNorm = 65.7697, GNorm = 0.4154, lr_0 = 3.2656e-04
Loss = 6.2984e-02, PNorm = 65.7760, GNorm = 0.4875, lr_0 = 3.2633e-04
Loss = 7.2940e-02, PNorm = 65.7824, GNorm = 0.6517, lr_0 = 3.2611e-04
Loss = 6.3995e-02, PNorm = 65.7914, GNorm = 0.4601, lr_0 = 3.2589e-04
Loss = 7.2607e-02, PNorm = 65.8005, GNorm = 0.5522, lr_0 = 3.2566e-04
Loss = 6.7811e-02, PNorm = 65.8091, GNorm = 0.4671, lr_0 = 3.2544e-04
Loss = 6.3410e-02, PNorm = 65.8134, GNorm = 0.7142, lr_0 = 3.2522e-04
Loss = 5.7658e-02, PNorm = 65.8162, GNorm = 0.8401, lr_0 = 3.2499e-04
Loss = 6.4564e-02, PNorm = 65.8181, GNorm = 0.9307, lr_0 = 3.2477e-04
Loss = 6.2324e-02, PNorm = 65.8234, GNorm = 0.9770, lr_0 = 3.2455e-04
Loss = 7.0078e-02, PNorm = 65.8302, GNorm = 0.5433, lr_0 = 3.2433e-04
Loss = 6.9334e-02, PNorm = 65.8366, GNorm = 0.6068, lr_0 = 3.2410e-04
Loss = 6.3938e-02, PNorm = 65.8440, GNorm = 0.8151, lr_0 = 3.2388e-04
Loss = 7.2165e-02, PNorm = 65.8507, GNorm = 0.5394, lr_0 = 3.2366e-04
Loss = 7.2090e-02, PNorm = 65.8546, GNorm = 1.0099, lr_0 = 3.2344e-04
Loss = 6.5101e-02, PNorm = 65.8602, GNorm = 0.7983, lr_0 = 3.2322e-04
Loss = 6.8576e-02, PNorm = 65.8653, GNorm = 0.4883, lr_0 = 3.2300e-04
Loss = 8.0888e-02, PNorm = 65.8727, GNorm = 0.4462, lr_0 = 3.2277e-04
Loss = 6.6458e-02, PNorm = 65.8808, GNorm = 0.4765, lr_0 = 3.2255e-04
Loss = 6.5084e-02, PNorm = 65.8852, GNorm = 0.7895, lr_0 = 3.2233e-04
Loss = 7.6525e-02, PNorm = 65.8894, GNorm = 0.5682, lr_0 = 3.2211e-04
Loss = 6.3479e-02, PNorm = 65.8965, GNorm = 0.4909, lr_0 = 3.2189e-04
Loss = 6.6791e-02, PNorm = 65.9033, GNorm = 0.5189, lr_0 = 3.2167e-04
Loss = 7.7144e-02, PNorm = 65.9095, GNorm = 0.7009, lr_0 = 3.2145e-04
Loss = 7.1441e-02, PNorm = 65.9179, GNorm = 0.9308, lr_0 = 3.2123e-04
Loss = 6.8698e-02, PNorm = 65.9259, GNorm = 0.6880, lr_0 = 3.2101e-04
Loss = 6.4807e-02, PNorm = 65.9358, GNorm = 0.6723, lr_0 = 3.2079e-04
Loss = 5.7676e-02, PNorm = 65.9407, GNorm = 0.5172, lr_0 = 3.2057e-04
Loss = 5.7821e-02, PNorm = 65.9449, GNorm = 0.4534, lr_0 = 3.2035e-04
Loss = 6.2347e-02, PNorm = 65.9506, GNorm = 0.5477, lr_0 = 3.2013e-04
Loss = 6.5241e-02, PNorm = 65.9578, GNorm = 1.0419, lr_0 = 3.1991e-04
Loss = 6.7264e-02, PNorm = 65.9664, GNorm = 0.4941, lr_0 = 3.1969e-04
Loss = 7.0208e-02, PNorm = 65.9750, GNorm = 0.8199, lr_0 = 3.1947e-04
Loss = 6.0764e-02, PNorm = 65.9842, GNorm = 0.8136, lr_0 = 3.1925e-04
Loss = 6.1279e-02, PNorm = 65.9925, GNorm = 0.6851, lr_0 = 3.1904e-04
Loss = 6.7881e-02, PNorm = 66.0008, GNorm = 1.0358, lr_0 = 3.1882e-04
Loss = 6.6794e-02, PNorm = 66.0070, GNorm = 0.4846, lr_0 = 3.1860e-04
Loss = 7.8976e-02, PNorm = 66.0113, GNorm = 0.6222, lr_0 = 3.1838e-04
Loss = 6.5261e-02, PNorm = 66.0178, GNorm = 0.5278, lr_0 = 3.1816e-04
Loss = 6.5823e-02, PNorm = 66.0256, GNorm = 0.7068, lr_0 = 3.1794e-04
Loss = 6.9598e-02, PNorm = 66.0328, GNorm = 0.5316, lr_0 = 3.1773e-04
Loss = 6.4612e-02, PNorm = 66.0369, GNorm = 0.5882, lr_0 = 3.1751e-04
Loss = 7.1029e-02, PNorm = 66.0404, GNorm = 0.6856, lr_0 = 3.1729e-04
Loss = 6.2619e-02, PNorm = 66.0460, GNorm = 0.6675, lr_0 = 3.1707e-04
Loss = 6.4533e-02, PNorm = 66.0532, GNorm = 0.7267, lr_0 = 3.1686e-04
Loss = 7.0751e-02, PNorm = 66.0580, GNorm = 0.7001, lr_0 = 3.1664e-04
Loss = 6.0853e-02, PNorm = 66.0633, GNorm = 0.3349, lr_0 = 3.1642e-04
Loss = 6.1426e-02, PNorm = 66.0666, GNorm = 0.5502, lr_0 = 3.1621e-04
Validation mae = 0.392090
Epoch 16
Loss = 5.6941e-02, PNorm = 66.0727, GNorm = 0.4322, lr_0 = 3.1599e-04
Loss = 5.4591e-02, PNorm = 66.0812, GNorm = 0.4734, lr_0 = 3.1577e-04
Loss = 5.8685e-02, PNorm = 66.0880, GNorm = 0.5966, lr_0 = 3.1556e-04
Loss = 6.0169e-02, PNorm = 66.0938, GNorm = 0.4476, lr_0 = 3.1534e-04
Loss = 5.9508e-02, PNorm = 66.0965, GNorm = 0.4374, lr_0 = 3.1512e-04
Loss = 5.6229e-02, PNorm = 66.1003, GNorm = 0.7502, lr_0 = 3.1491e-04
Loss = 4.7445e-02, PNorm = 66.1055, GNorm = 0.4563, lr_0 = 3.1469e-04
Loss = 5.3610e-02, PNorm = 66.1119, GNorm = 0.4914, lr_0 = 3.1448e-04
Loss = 6.2096e-02, PNorm = 66.1201, GNorm = 0.5375, lr_0 = 3.1426e-04
Loss = 5.4201e-02, PNorm = 66.1266, GNorm = 0.5139, lr_0 = 3.1405e-04
Loss = 6.1499e-02, PNorm = 66.1322, GNorm = 0.6112, lr_0 = 3.1383e-04
Loss = 5.5896e-02, PNorm = 66.1349, GNorm = 0.5302, lr_0 = 3.1362e-04
Loss = 5.9192e-02, PNorm = 66.1418, GNorm = 0.9462, lr_0 = 3.1340e-04
Loss = 6.6266e-02, PNorm = 66.1488, GNorm = 0.4543, lr_0 = 3.1319e-04
Loss = 4.9294e-02, PNorm = 66.1555, GNorm = 0.5966, lr_0 = 3.1297e-04
Loss = 5.8727e-02, PNorm = 66.1633, GNorm = 0.5048, lr_0 = 3.1276e-04
Loss = 5.7979e-02, PNorm = 66.1697, GNorm = 0.4773, lr_0 = 3.1254e-04
Loss = 6.7785e-02, PNorm = 66.1741, GNorm = 0.7012, lr_0 = 3.1233e-04
Loss = 6.0579e-02, PNorm = 66.1801, GNorm = 0.4971, lr_0 = 3.1212e-04
Loss = 5.6966e-02, PNorm = 66.1857, GNorm = 0.5363, lr_0 = 3.1190e-04
Loss = 6.3368e-02, PNorm = 66.1920, GNorm = 0.7890, lr_0 = 3.1169e-04
Loss = 6.3595e-02, PNorm = 66.2003, GNorm = 0.5771, lr_0 = 3.1147e-04
Loss = 5.9703e-02, PNorm = 66.2074, GNorm = 1.1729, lr_0 = 3.1126e-04
Loss = 6.6720e-02, PNorm = 66.2131, GNorm = 0.6901, lr_0 = 3.1105e-04
Loss = 5.9329e-02, PNorm = 66.2196, GNorm = 0.4774, lr_0 = 3.1083e-04
Loss = 5.5662e-02, PNorm = 66.2254, GNorm = 0.5002, lr_0 = 3.1062e-04
Loss = 5.9648e-02, PNorm = 66.2303, GNorm = 0.5680, lr_0 = 3.1041e-04
Loss = 5.4481e-02, PNorm = 66.2348, GNorm = 0.5189, lr_0 = 3.1020e-04
Loss = 6.4419e-02, PNorm = 66.2393, GNorm = 0.5062, lr_0 = 3.0998e-04
Loss = 6.5556e-02, PNorm = 66.2456, GNorm = 0.5604, lr_0 = 3.0977e-04
Loss = 5.8272e-02, PNorm = 66.2520, GNorm = 0.6614, lr_0 = 3.0956e-04
Loss = 6.0872e-02, PNorm = 66.2579, GNorm = 0.6487, lr_0 = 3.0935e-04
Loss = 6.3860e-02, PNorm = 66.2624, GNorm = 0.5344, lr_0 = 3.0914e-04
Loss = 7.0919e-02, PNorm = 66.2684, GNorm = 0.5687, lr_0 = 3.0892e-04
Loss = 5.8184e-02, PNorm = 66.2776, GNorm = 0.8357, lr_0 = 3.0871e-04
Loss = 6.9798e-02, PNorm = 66.2838, GNorm = 0.7721, lr_0 = 3.0850e-04
Loss = 6.2234e-02, PNorm = 66.2879, GNorm = 0.5007, lr_0 = 3.0829e-04
Loss = 5.4703e-02, PNorm = 66.2930, GNorm = 0.6950, lr_0 = 3.0808e-04
Loss = 5.4916e-02, PNorm = 66.2982, GNorm = 0.4448, lr_0 = 3.0787e-04
Loss = 5.6344e-02, PNorm = 66.3042, GNorm = 0.5804, lr_0 = 3.0766e-04
Loss = 7.0223e-02, PNorm = 66.3120, GNorm = 0.5605, lr_0 = 3.0745e-04
Loss = 6.0046e-02, PNorm = 66.3183, GNorm = 0.5104, lr_0 = 3.0723e-04
Loss = 6.0502e-02, PNorm = 66.3243, GNorm = 0.4923, lr_0 = 3.0702e-04
Loss = 5.6228e-02, PNorm = 66.3293, GNorm = 0.6688, lr_0 = 3.0681e-04
Loss = 5.8355e-02, PNorm = 66.3363, GNorm = 0.8073, lr_0 = 3.0660e-04
Loss = 6.7645e-02, PNorm = 66.3424, GNorm = 0.5950, lr_0 = 3.0639e-04
Loss = 7.3947e-02, PNorm = 66.3508, GNorm = 1.5440, lr_0 = 3.0618e-04
Loss = 7.6548e-02, PNorm = 66.3582, GNorm = 1.1565, lr_0 = 3.0597e-04
Loss = 6.7117e-02, PNorm = 66.3640, GNorm = 0.7494, lr_0 = 3.0576e-04
Loss = 7.6304e-02, PNorm = 66.3715, GNorm = 0.5260, lr_0 = 3.0555e-04
Loss = 6.0052e-02, PNorm = 66.3757, GNorm = 0.5374, lr_0 = 3.0535e-04
Loss = 6.0627e-02, PNorm = 66.3829, GNorm = 0.4789, lr_0 = 3.0514e-04
Loss = 6.2102e-02, PNorm = 66.3871, GNorm = 0.5786, lr_0 = 3.0493e-04
Loss = 6.6373e-02, PNorm = 66.3939, GNorm = 0.7358, lr_0 = 3.0472e-04
Loss = 6.2227e-02, PNorm = 66.4008, GNorm = 0.8505, lr_0 = 3.0451e-04
Loss = 6.8862e-02, PNorm = 66.4091, GNorm = 0.3984, lr_0 = 3.0430e-04
Loss = 6.1055e-02, PNorm = 66.4176, GNorm = 0.8332, lr_0 = 3.0409e-04
Loss = 7.2620e-02, PNorm = 66.4255, GNorm = 0.6692, lr_0 = 3.0388e-04
Loss = 5.9681e-02, PNorm = 66.4307, GNorm = 0.5599, lr_0 = 3.0368e-04
Loss = 6.0208e-02, PNorm = 66.4352, GNorm = 0.7490, lr_0 = 3.0347e-04
Loss = 5.7756e-02, PNorm = 66.4408, GNorm = 0.9120, lr_0 = 3.0326e-04
Loss = 6.9066e-02, PNorm = 66.4450, GNorm = 0.7783, lr_0 = 3.0305e-04
Loss = 5.9968e-02, PNorm = 66.4515, GNorm = 0.6680, lr_0 = 3.0284e-04
Loss = 6.4546e-02, PNorm = 66.4568, GNorm = 0.9195, lr_0 = 3.0264e-04
Loss = 5.9926e-02, PNorm = 66.4667, GNorm = 1.1520, lr_0 = 3.0243e-04
Loss = 5.6976e-02, PNorm = 66.4753, GNorm = 0.4787, lr_0 = 3.0222e-04
Loss = 5.5433e-02, PNorm = 66.4817, GNorm = 0.6069, lr_0 = 3.0202e-04
Loss = 6.0320e-02, PNorm = 66.4874, GNorm = 0.5917, lr_0 = 3.0181e-04
Loss = 6.2747e-02, PNorm = 66.4948, GNorm = 0.7258, lr_0 = 3.0160e-04
Loss = 5.9378e-02, PNorm = 66.5007, GNorm = 0.4822, lr_0 = 3.0140e-04
Loss = 6.9991e-02, PNorm = 66.5068, GNorm = 0.4692, lr_0 = 3.0119e-04
Loss = 6.2011e-02, PNorm = 66.5127, GNorm = 0.4597, lr_0 = 3.0098e-04
Loss = 5.8524e-02, PNorm = 66.5222, GNorm = 0.5499, lr_0 = 3.0078e-04
Loss = 6.3111e-02, PNorm = 66.5296, GNorm = 0.7151, lr_0 = 3.0057e-04
Loss = 5.7788e-02, PNorm = 66.5360, GNorm = 0.8192, lr_0 = 3.0036e-04
Loss = 6.0083e-02, PNorm = 66.5434, GNorm = 0.6188, lr_0 = 3.0016e-04
Loss = 5.0743e-02, PNorm = 66.5484, GNorm = 0.7925, lr_0 = 2.9995e-04
Loss = 6.7956e-02, PNorm = 66.5537, GNorm = 0.7451, lr_0 = 2.9975e-04
Loss = 6.1598e-02, PNorm = 66.5577, GNorm = 1.0313, lr_0 = 2.9954e-04
Loss = 6.2081e-02, PNorm = 66.5633, GNorm = 0.8630, lr_0 = 2.9934e-04
Loss = 6.7250e-02, PNorm = 66.5696, GNorm = 0.9195, lr_0 = 2.9913e-04
Loss = 6.0030e-02, PNorm = 66.5741, GNorm = 0.5189, lr_0 = 2.9893e-04
Loss = 6.5746e-02, PNorm = 66.5772, GNorm = 0.6438, lr_0 = 2.9872e-04
Loss = 6.5801e-02, PNorm = 66.5800, GNorm = 1.0486, lr_0 = 2.9852e-04
Loss = 6.1675e-02, PNorm = 66.5833, GNorm = 0.5338, lr_0 = 2.9831e-04
Loss = 5.8197e-02, PNorm = 66.5882, GNorm = 0.5108, lr_0 = 2.9811e-04
Loss = 6.4449e-02, PNorm = 66.5941, GNorm = 0.5725, lr_0 = 2.9790e-04
Loss = 5.8980e-02, PNorm = 66.6022, GNorm = 0.4075, lr_0 = 2.9770e-04
Loss = 6.7600e-02, PNorm = 66.6041, GNorm = 0.5421, lr_0 = 2.9750e-04
Loss = 5.7888e-02, PNorm = 66.6071, GNorm = 0.4935, lr_0 = 2.9729e-04
Loss = 5.9390e-02, PNorm = 66.6106, GNorm = 0.8109, lr_0 = 2.9709e-04
Loss = 6.7192e-02, PNorm = 66.6167, GNorm = 0.6411, lr_0 = 2.9689e-04
Loss = 7.5355e-02, PNorm = 66.6224, GNorm = 0.6579, lr_0 = 2.9668e-04
Loss = 6.6405e-02, PNorm = 66.6266, GNorm = 0.4525, lr_0 = 2.9648e-04
Loss = 4.9656e-02, PNorm = 66.6312, GNorm = 0.5166, lr_0 = 2.9628e-04
Loss = 6.4667e-02, PNorm = 66.6353, GNorm = 0.6607, lr_0 = 2.9607e-04
Loss = 6.4669e-02, PNorm = 66.6398, GNorm = 0.6440, lr_0 = 2.9587e-04
Loss = 5.9647e-02, PNorm = 66.6448, GNorm = 0.6429, lr_0 = 2.9567e-04
Loss = 6.0822e-02, PNorm = 66.6496, GNorm = 0.5214, lr_0 = 2.9546e-04
Loss = 5.4537e-02, PNorm = 66.6544, GNorm = 0.5269, lr_0 = 2.9526e-04
Loss = 5.7948e-02, PNorm = 66.6596, GNorm = 0.7564, lr_0 = 2.9506e-04
Loss = 6.2423e-02, PNorm = 66.6653, GNorm = 0.9231, lr_0 = 2.9486e-04
Loss = 6.2475e-02, PNorm = 66.6707, GNorm = 0.6743, lr_0 = 2.9466e-04
Loss = 6.4833e-02, PNorm = 66.6756, GNorm = 0.9103, lr_0 = 2.9445e-04
Loss = 6.2444e-02, PNorm = 66.6803, GNorm = 0.8920, lr_0 = 2.9425e-04
Loss = 5.8390e-02, PNorm = 66.6866, GNorm = 0.6344, lr_0 = 2.9405e-04
Loss = 7.0468e-02, PNorm = 66.6929, GNorm = 0.6308, lr_0 = 2.9385e-04
Loss = 6.4598e-02, PNorm = 66.7001, GNorm = 0.8400, lr_0 = 2.9365e-04
Loss = 6.9480e-02, PNorm = 66.7079, GNorm = 0.4630, lr_0 = 2.9345e-04
Loss = 6.2992e-02, PNorm = 66.7120, GNorm = 0.4760, lr_0 = 2.9325e-04
Loss = 6.2192e-02, PNorm = 66.7178, GNorm = 0.8533, lr_0 = 2.9305e-04
Loss = 6.4827e-02, PNorm = 66.7247, GNorm = 0.7077, lr_0 = 2.9284e-04
Loss = 5.2245e-02, PNorm = 66.7305, GNorm = 0.5490, lr_0 = 2.9264e-04
Loss = 6.4086e-02, PNorm = 66.7369, GNorm = 0.8855, lr_0 = 2.9244e-04
Loss = 6.5163e-02, PNorm = 66.7452, GNorm = 0.5947, lr_0 = 2.9224e-04
Loss = 6.3517e-02, PNorm = 66.7518, GNorm = 0.5339, lr_0 = 2.9204e-04
Loss = 5.8466e-02, PNorm = 66.7551, GNorm = 0.4305, lr_0 = 2.9184e-04
Loss = 5.6722e-02, PNorm = 66.7566, GNorm = 0.8866, lr_0 = 2.9164e-04
Loss = 6.4566e-02, PNorm = 66.7583, GNorm = 0.5001, lr_0 = 2.9144e-04
Loss = 6.7979e-02, PNorm = 66.7627, GNorm = 0.6567, lr_0 = 2.9124e-04
Validation mae = 0.388186
Epoch 17
Loss = 5.4802e-02, PNorm = 66.7667, GNorm = 0.7024, lr_0 = 2.9104e-04
Loss = 4.9138e-02, PNorm = 66.7730, GNorm = 0.6708, lr_0 = 2.9084e-04
Loss = 5.5385e-02, PNorm = 66.7788, GNorm = 0.6399, lr_0 = 2.9065e-04
Loss = 6.4989e-02, PNorm = 66.7858, GNorm = 0.9522, lr_0 = 2.9045e-04
Loss = 4.8106e-02, PNorm = 66.7920, GNorm = 0.5514, lr_0 = 2.9025e-04
Loss = 6.3498e-02, PNorm = 66.7957, GNorm = 0.3327, lr_0 = 2.9005e-04
Loss = 5.8249e-02, PNorm = 66.8016, GNorm = 0.4262, lr_0 = 2.8985e-04
Loss = 5.4024e-02, PNorm = 66.8092, GNorm = 0.3872, lr_0 = 2.8965e-04
Loss = 4.6348e-02, PNorm = 66.8130, GNorm = 0.6128, lr_0 = 2.8945e-04
Loss = 5.6029e-02, PNorm = 66.8183, GNorm = 0.8278, lr_0 = 2.8925e-04
Loss = 5.3556e-02, PNorm = 66.8230, GNorm = 0.5850, lr_0 = 2.8906e-04
Loss = 6.1395e-02, PNorm = 66.8311, GNorm = 0.5530, lr_0 = 2.8886e-04
Loss = 5.0064e-02, PNorm = 66.8378, GNorm = 0.3748, lr_0 = 2.8866e-04
Loss = 5.5352e-02, PNorm = 66.8426, GNorm = 0.4882, lr_0 = 2.8846e-04
Loss = 5.3278e-02, PNorm = 66.8483, GNorm = 0.4233, lr_0 = 2.8826e-04
Loss = 4.4888e-02, PNorm = 66.8513, GNorm = 0.5410, lr_0 = 2.8807e-04
Loss = 5.7797e-02, PNorm = 66.8541, GNorm = 0.6997, lr_0 = 2.8787e-04
Loss = 5.0230e-02, PNorm = 66.8593, GNorm = 0.5265, lr_0 = 2.8767e-04
Loss = 6.0789e-02, PNorm = 66.8668, GNorm = 0.4013, lr_0 = 2.8748e-04
Loss = 5.2246e-02, PNorm = 66.8727, GNorm = 0.5917, lr_0 = 2.8728e-04
Loss = 5.9382e-02, PNorm = 66.8806, GNorm = 0.7769, lr_0 = 2.8708e-04
Loss = 4.9132e-02, PNorm = 66.8874, GNorm = 0.5255, lr_0 = 2.8689e-04
Loss = 5.4155e-02, PNorm = 66.8931, GNorm = 0.6109, lr_0 = 2.8669e-04
Loss = 5.2471e-02, PNorm = 66.8991, GNorm = 0.5896, lr_0 = 2.8649e-04
Loss = 5.6582e-02, PNorm = 66.9038, GNorm = 0.3882, lr_0 = 2.8630e-04
Loss = 6.4073e-02, PNorm = 66.9079, GNorm = 0.5488, lr_0 = 2.8610e-04
Loss = 5.7620e-02, PNorm = 66.9136, GNorm = 0.6723, lr_0 = 2.8590e-04
Loss = 5.5296e-02, PNorm = 66.9177, GNorm = 1.0084, lr_0 = 2.8571e-04
Loss = 5.4498e-02, PNorm = 66.9210, GNorm = 0.8890, lr_0 = 2.8551e-04
Loss = 6.5946e-02, PNorm = 66.9252, GNorm = 0.6506, lr_0 = 2.8532e-04
Loss = 6.5760e-02, PNorm = 66.9303, GNorm = 0.5082, lr_0 = 2.8512e-04
Loss = 5.9974e-02, PNorm = 66.9367, GNorm = 0.7519, lr_0 = 2.8493e-04
Loss = 6.4041e-02, PNorm = 66.9436, GNorm = 0.5490, lr_0 = 2.8473e-04
Loss = 5.9731e-02, PNorm = 66.9511, GNorm = 0.5112, lr_0 = 2.8454e-04
Loss = 6.5232e-02, PNorm = 66.9564, GNorm = 0.7897, lr_0 = 2.8434e-04
Loss = 5.6576e-02, PNorm = 66.9619, GNorm = 0.5878, lr_0 = 2.8415e-04
Loss = 5.3576e-02, PNorm = 66.9658, GNorm = 0.6550, lr_0 = 2.8395e-04
Loss = 5.9736e-02, PNorm = 66.9677, GNorm = 0.5824, lr_0 = 2.8376e-04
Loss = 5.8428e-02, PNorm = 66.9732, GNorm = 1.0545, lr_0 = 2.8356e-04
Loss = 7.5336e-02, PNorm = 66.9811, GNorm = 0.7075, lr_0 = 2.8337e-04
Loss = 5.1504e-02, PNorm = 66.9890, GNorm = 1.1708, lr_0 = 2.8317e-04
Loss = 6.3070e-02, PNorm = 66.9960, GNorm = 0.5638, lr_0 = 2.8298e-04
Loss = 7.6141e-02, PNorm = 67.0028, GNorm = 0.6224, lr_0 = 2.8279e-04
Loss = 6.0369e-02, PNorm = 67.0083, GNorm = 0.5600, lr_0 = 2.8259e-04
Loss = 5.9603e-02, PNorm = 67.0143, GNorm = 0.5066, lr_0 = 2.8240e-04
Loss = 6.0722e-02, PNorm = 67.0212, GNorm = 0.7514, lr_0 = 2.8221e-04
Loss = 5.8682e-02, PNorm = 67.0266, GNorm = 0.6200, lr_0 = 2.8201e-04
Loss = 5.7459e-02, PNorm = 67.0307, GNorm = 0.7231, lr_0 = 2.8182e-04
Loss = 5.7375e-02, PNorm = 67.0377, GNorm = 0.6190, lr_0 = 2.8163e-04
Loss = 5.7862e-02, PNorm = 67.0422, GNorm = 0.8177, lr_0 = 2.8143e-04
Loss = 5.6724e-02, PNorm = 67.0504, GNorm = 0.6569, lr_0 = 2.8124e-04
Loss = 5.2885e-02, PNorm = 67.0581, GNorm = 0.7891, lr_0 = 2.8105e-04
Loss = 5.9998e-02, PNorm = 67.0624, GNorm = 0.6722, lr_0 = 2.8085e-04
Loss = 5.6691e-02, PNorm = 67.0668, GNorm = 0.6549, lr_0 = 2.8066e-04
Loss = 6.0182e-02, PNorm = 67.0722, GNorm = 0.6229, lr_0 = 2.8047e-04
Loss = 5.1023e-02, PNorm = 67.0794, GNorm = 0.4512, lr_0 = 2.8028e-04
Loss = 6.2592e-02, PNorm = 67.0858, GNorm = 0.7881, lr_0 = 2.8009e-04
Loss = 5.6869e-02, PNorm = 67.0900, GNorm = 0.5831, lr_0 = 2.7989e-04
Loss = 5.1911e-02, PNorm = 67.0955, GNorm = 0.5396, lr_0 = 2.7970e-04
Loss = 5.1439e-02, PNorm = 67.0995, GNorm = 0.6639, lr_0 = 2.7951e-04
Loss = 6.9185e-02, PNorm = 67.1045, GNorm = 0.6779, lr_0 = 2.7932e-04
Loss = 5.7997e-02, PNorm = 67.1094, GNorm = 0.6745, lr_0 = 2.7913e-04
Loss = 6.5359e-02, PNorm = 67.1123, GNorm = 0.7837, lr_0 = 2.7894e-04
Loss = 5.2900e-02, PNorm = 67.1162, GNorm = 0.4348, lr_0 = 2.7875e-04
Loss = 5.6493e-02, PNorm = 67.1218, GNorm = 0.6984, lr_0 = 2.7855e-04
Loss = 5.7329e-02, PNorm = 67.1285, GNorm = 0.5721, lr_0 = 2.7836e-04
Loss = 5.1746e-02, PNorm = 67.1335, GNorm = 0.7819, lr_0 = 2.7817e-04
Loss = 5.3016e-02, PNorm = 67.1388, GNorm = 0.6136, lr_0 = 2.7798e-04
Loss = 4.7371e-02, PNorm = 67.1449, GNorm = 0.5687, lr_0 = 2.7779e-04
Loss = 5.2299e-02, PNorm = 67.1483, GNorm = 0.6326, lr_0 = 2.7760e-04
Loss = 6.0735e-02, PNorm = 67.1530, GNorm = 0.7203, lr_0 = 2.7741e-04
Loss = 5.8397e-02, PNorm = 67.1574, GNorm = 0.4440, lr_0 = 2.7722e-04
Loss = 5.7769e-02, PNorm = 67.1605, GNorm = 0.5748, lr_0 = 2.7703e-04
Loss = 5.2994e-02, PNorm = 67.1651, GNorm = 0.5105, lr_0 = 2.7684e-04
Loss = 5.5129e-02, PNorm = 67.1702, GNorm = 0.4411, lr_0 = 2.7665e-04
Loss = 5.5641e-02, PNorm = 67.1747, GNorm = 0.8065, lr_0 = 2.7646e-04
Loss = 5.1169e-02, PNorm = 67.1791, GNorm = 0.5521, lr_0 = 2.7627e-04
Loss = 5.6722e-02, PNorm = 67.1844, GNorm = 0.6098, lr_0 = 2.7608e-04
Loss = 5.5095e-02, PNorm = 67.1896, GNorm = 0.6352, lr_0 = 2.7590e-04
Loss = 5.1523e-02, PNorm = 67.1945, GNorm = 0.5485, lr_0 = 2.7571e-04
Loss = 5.5059e-02, PNorm = 67.1982, GNorm = 0.5709, lr_0 = 2.7552e-04
Loss = 5.8035e-02, PNorm = 67.2015, GNorm = 0.5660, lr_0 = 2.7533e-04
Loss = 5.2128e-02, PNorm = 67.2061, GNorm = 0.6035, lr_0 = 2.7514e-04
Loss = 6.1071e-02, PNorm = 67.2160, GNorm = 0.4817, lr_0 = 2.7495e-04
Loss = 5.6825e-02, PNorm = 67.2213, GNorm = 0.5637, lr_0 = 2.7476e-04
Loss = 5.9180e-02, PNorm = 67.2249, GNorm = 0.5443, lr_0 = 2.7457e-04
Loss = 4.6355e-02, PNorm = 67.2270, GNorm = 0.4879, lr_0 = 2.7439e-04
Loss = 4.9361e-02, PNorm = 67.2279, GNorm = 0.4687, lr_0 = 2.7420e-04
Loss = 5.0372e-02, PNorm = 67.2313, GNorm = 0.4447, lr_0 = 2.7401e-04
Loss = 5.2046e-02, PNorm = 67.2364, GNorm = 0.5211, lr_0 = 2.7382e-04
Loss = 6.3746e-02, PNorm = 67.2419, GNorm = 0.6146, lr_0 = 2.7364e-04
Loss = 6.3241e-02, PNorm = 67.2476, GNorm = 0.8865, lr_0 = 2.7345e-04
Loss = 5.9621e-02, PNorm = 67.2530, GNorm = 0.8527, lr_0 = 2.7326e-04
Loss = 5.4072e-02, PNorm = 67.2577, GNorm = 0.4589, lr_0 = 2.7307e-04
Loss = 5.6859e-02, PNorm = 67.2612, GNorm = 0.6072, lr_0 = 2.7289e-04
Loss = 6.2049e-02, PNorm = 67.2677, GNorm = 0.5697, lr_0 = 2.7270e-04
Loss = 5.3710e-02, PNorm = 67.2716, GNorm = 0.4498, lr_0 = 2.7251e-04
Loss = 5.8125e-02, PNorm = 67.2749, GNorm = 0.3727, lr_0 = 2.7233e-04
Loss = 6.0776e-02, PNorm = 67.2793, GNorm = 0.9714, lr_0 = 2.7214e-04
Loss = 5.9965e-02, PNorm = 67.2814, GNorm = 0.7694, lr_0 = 2.7195e-04
Loss = 5.7372e-02, PNorm = 67.2842, GNorm = 0.4367, lr_0 = 2.7177e-04
Loss = 6.1418e-02, PNorm = 67.2873, GNorm = 0.4195, lr_0 = 2.7158e-04
Loss = 6.2217e-02, PNorm = 67.2891, GNorm = 0.4638, lr_0 = 2.7139e-04
Loss = 5.8832e-02, PNorm = 67.2925, GNorm = 0.5135, lr_0 = 2.7121e-04
Loss = 5.8827e-02, PNorm = 67.2974, GNorm = 0.7514, lr_0 = 2.7102e-04
Loss = 5.2849e-02, PNorm = 67.3013, GNorm = 0.4589, lr_0 = 2.7084e-04
Loss = 4.9111e-02, PNorm = 67.3055, GNorm = 0.5369, lr_0 = 2.7065e-04
Loss = 5.7562e-02, PNorm = 67.3093, GNorm = 0.6006, lr_0 = 2.7047e-04
Loss = 6.5243e-02, PNorm = 67.3138, GNorm = 1.2527, lr_0 = 2.7028e-04
Loss = 5.3986e-02, PNorm = 67.3168, GNorm = 0.5485, lr_0 = 2.7010e-04
Loss = 6.2097e-02, PNorm = 67.3212, GNorm = 0.8637, lr_0 = 2.6991e-04
Loss = 6.0747e-02, PNorm = 67.3283, GNorm = 0.5010, lr_0 = 2.6973e-04
Loss = 5.4702e-02, PNorm = 67.3336, GNorm = 0.4997, lr_0 = 2.6954e-04
Loss = 5.5810e-02, PNorm = 67.3389, GNorm = 0.6080, lr_0 = 2.6936e-04
Loss = 6.4751e-02, PNorm = 67.3447, GNorm = 0.6758, lr_0 = 2.6917e-04
Loss = 5.7862e-02, PNorm = 67.3505, GNorm = 0.5850, lr_0 = 2.6899e-04
Loss = 6.8078e-02, PNorm = 67.3550, GNorm = 0.5386, lr_0 = 2.6880e-04
Loss = 6.0469e-02, PNorm = 67.3616, GNorm = 0.8451, lr_0 = 2.6862e-04
Loss = 5.7266e-02, PNorm = 67.3670, GNorm = 0.6129, lr_0 = 2.6844e-04
Loss = 5.7596e-02, PNorm = 67.3737, GNorm = 0.6083, lr_0 = 2.6825e-04
Validation mae = 0.384467
Epoch 18
Loss = 4.5505e-02, PNorm = 67.3793, GNorm = 0.4715, lr_0 = 2.6807e-04
Loss = 4.8975e-02, PNorm = 67.3843, GNorm = 0.6865, lr_0 = 2.6788e-04
Loss = 4.8175e-02, PNorm = 67.3864, GNorm = 0.4285, lr_0 = 2.6770e-04
Loss = 4.8470e-02, PNorm = 67.3908, GNorm = 0.4927, lr_0 = 2.6752e-04
Loss = 4.4317e-02, PNorm = 67.3950, GNorm = 0.5275, lr_0 = 2.6733e-04
Loss = 4.5453e-02, PNorm = 67.4022, GNorm = 0.4255, lr_0 = 2.6715e-04
Loss = 4.5223e-02, PNorm = 67.4093, GNorm = 0.7195, lr_0 = 2.6697e-04
Loss = 4.1617e-02, PNorm = 67.4140, GNorm = 0.6817, lr_0 = 2.6678e-04
Loss = 4.9835e-02, PNorm = 67.4153, GNorm = 0.4570, lr_0 = 2.6660e-04
Loss = 4.8896e-02, PNorm = 67.4174, GNorm = 0.4859, lr_0 = 2.6642e-04
Loss = 5.0608e-02, PNorm = 67.4227, GNorm = 0.6616, lr_0 = 2.6624e-04
Loss = 5.5623e-02, PNorm = 67.4304, GNorm = 0.5391, lr_0 = 2.6605e-04
Loss = 4.8728e-02, PNorm = 67.4363, GNorm = 0.5382, lr_0 = 2.6587e-04
Loss = 5.9046e-02, PNorm = 67.4420, GNorm = 0.7249, lr_0 = 2.6569e-04
Loss = 5.5677e-02, PNorm = 67.4484, GNorm = 0.4100, lr_0 = 2.6551e-04
Loss = 4.5122e-02, PNorm = 67.4538, GNorm = 0.4146, lr_0 = 2.6533e-04
Loss = 4.6069e-02, PNorm = 67.4582, GNorm = 0.4874, lr_0 = 2.6514e-04
Loss = 5.1566e-02, PNorm = 67.4603, GNorm = 0.6986, lr_0 = 2.6496e-04
Loss = 5.0680e-02, PNorm = 67.4636, GNorm = 0.5011, lr_0 = 2.6478e-04
Loss = 4.9377e-02, PNorm = 67.4687, GNorm = 0.5484, lr_0 = 2.6460e-04
Loss = 5.2920e-02, PNorm = 67.4754, GNorm = 0.6355, lr_0 = 2.6442e-04
Loss = 5.2567e-02, PNorm = 67.4796, GNorm = 0.4854, lr_0 = 2.6424e-04
Loss = 5.1449e-02, PNorm = 67.4848, GNorm = 0.4928, lr_0 = 2.6406e-04
Loss = 5.3442e-02, PNorm = 67.4898, GNorm = 0.6602, lr_0 = 2.6388e-04
Loss = 5.0227e-02, PNorm = 67.4943, GNorm = 0.4851, lr_0 = 2.6369e-04
Loss = 4.4927e-02, PNorm = 67.4995, GNorm = 0.4578, lr_0 = 2.6351e-04
Loss = 5.1906e-02, PNorm = 67.5026, GNorm = 0.5704, lr_0 = 2.6333e-04
Loss = 4.8252e-02, PNorm = 67.5080, GNorm = 0.5793, lr_0 = 2.6315e-04
Loss = 4.1596e-02, PNorm = 67.5125, GNorm = 0.4745, lr_0 = 2.6297e-04
Loss = 4.2722e-02, PNorm = 67.5176, GNorm = 0.6523, lr_0 = 2.6279e-04
Loss = 5.5387e-02, PNorm = 67.5220, GNorm = 0.5719, lr_0 = 2.6261e-04
Loss = 4.9097e-02, PNorm = 67.5271, GNorm = 0.5488, lr_0 = 2.6243e-04
Loss = 6.0933e-02, PNorm = 67.5303, GNorm = 1.0331, lr_0 = 2.6225e-04
Loss = 4.5521e-02, PNorm = 67.5338, GNorm = 0.4396, lr_0 = 2.6207e-04
Loss = 4.6910e-02, PNorm = 67.5383, GNorm = 0.6678, lr_0 = 2.6189e-04
Loss = 4.4962e-02, PNorm = 67.5413, GNorm = 0.4273, lr_0 = 2.6171e-04
Loss = 5.2154e-02, PNorm = 67.5451, GNorm = 0.4591, lr_0 = 2.6153e-04
Loss = 5.9062e-02, PNorm = 67.5498, GNorm = 0.5091, lr_0 = 2.6136e-04
Loss = 4.8781e-02, PNorm = 67.5541, GNorm = 0.5827, lr_0 = 2.6118e-04
Loss = 5.2590e-02, PNorm = 67.5574, GNorm = 0.7147, lr_0 = 2.6100e-04
Loss = 5.0562e-02, PNorm = 67.5612, GNorm = 0.7343, lr_0 = 2.6082e-04
Loss = 4.6761e-02, PNorm = 67.5662, GNorm = 0.5749, lr_0 = 2.6064e-04
Loss = 5.9532e-02, PNorm = 67.5711, GNorm = 0.7047, lr_0 = 2.6046e-04
Loss = 6.6243e-02, PNorm = 67.5782, GNorm = 0.4765, lr_0 = 2.6028e-04
Loss = 5.3163e-02, PNorm = 67.5866, GNorm = 0.6274, lr_0 = 2.6011e-04
Loss = 4.9659e-02, PNorm = 67.5913, GNorm = 0.6155, lr_0 = 2.5993e-04
Loss = 5.3479e-02, PNorm = 67.5956, GNorm = 0.4521, lr_0 = 2.5975e-04
Loss = 5.2972e-02, PNorm = 67.6010, GNorm = 0.5690, lr_0 = 2.5957e-04
Loss = 5.6859e-02, PNorm = 67.6074, GNorm = 0.5580, lr_0 = 2.5939e-04
Loss = 5.2808e-02, PNorm = 67.6103, GNorm = 0.5125, lr_0 = 2.5922e-04
Loss = 5.6044e-02, PNorm = 67.6141, GNorm = 0.8242, lr_0 = 2.5904e-04
Loss = 5.1600e-02, PNorm = 67.6185, GNorm = 0.3762, lr_0 = 2.5886e-04
Loss = 4.7948e-02, PNorm = 67.6238, GNorm = 0.5659, lr_0 = 2.5868e-04
Loss = 6.0386e-02, PNorm = 67.6276, GNorm = 0.6235, lr_0 = 2.5851e-04
Loss = 5.6848e-02, PNorm = 67.6317, GNorm = 0.6487, lr_0 = 2.5833e-04
Loss = 6.0805e-02, PNorm = 67.6359, GNorm = 0.5931, lr_0 = 2.5815e-04
Loss = 5.5057e-02, PNorm = 67.6397, GNorm = 0.5557, lr_0 = 2.5797e-04
Loss = 6.0970e-02, PNorm = 67.6440, GNorm = 0.6278, lr_0 = 2.5780e-04
Loss = 5.9374e-02, PNorm = 67.6486, GNorm = 0.7462, lr_0 = 2.5762e-04
Loss = 5.4409e-02, PNorm = 67.6525, GNorm = 0.5973, lr_0 = 2.5745e-04
Loss = 5.7441e-02, PNorm = 67.6580, GNorm = 0.7925, lr_0 = 2.5727e-04
Loss = 5.0632e-02, PNorm = 67.6626, GNorm = 0.7405, lr_0 = 2.5709e-04
Loss = 5.3182e-02, PNorm = 67.6695, GNorm = 0.4258, lr_0 = 2.5692e-04
Loss = 5.0593e-02, PNorm = 67.6756, GNorm = 0.6573, lr_0 = 2.5674e-04
Loss = 5.4292e-02, PNorm = 67.6805, GNorm = 0.5414, lr_0 = 2.5656e-04
Loss = 6.1549e-02, PNorm = 67.6842, GNorm = 0.6682, lr_0 = 2.5639e-04
Loss = 5.5185e-02, PNorm = 67.6872, GNorm = 0.8541, lr_0 = 2.5621e-04
Loss = 5.1343e-02, PNorm = 67.6911, GNorm = 0.5541, lr_0 = 2.5604e-04
Loss = 5.5776e-02, PNorm = 67.6969, GNorm = 0.6036, lr_0 = 2.5586e-04
Loss = 5.6950e-02, PNorm = 67.7025, GNorm = 0.6334, lr_0 = 2.5569e-04
Loss = 5.4134e-02, PNorm = 67.7087, GNorm = 0.3725, lr_0 = 2.5551e-04
Loss = 5.5586e-02, PNorm = 67.7142, GNorm = 0.9356, lr_0 = 2.5534e-04
Loss = 5.4665e-02, PNorm = 67.7184, GNorm = 0.6988, lr_0 = 2.5516e-04
Loss = 6.1457e-02, PNorm = 67.7219, GNorm = 1.3218, lr_0 = 2.5499e-04
Loss = 5.4583e-02, PNorm = 67.7284, GNorm = 0.4728, lr_0 = 2.5481e-04
Loss = 5.0017e-02, PNorm = 67.7327, GNorm = 0.6562, lr_0 = 2.5464e-04
Loss = 6.2928e-02, PNorm = 67.7366, GNorm = 0.4461, lr_0 = 2.5446e-04
Loss = 5.1201e-02, PNorm = 67.7398, GNorm = 0.5827, lr_0 = 2.5429e-04
Loss = 5.3944e-02, PNorm = 67.7440, GNorm = 0.5580, lr_0 = 2.5411e-04
Loss = 5.6235e-02, PNorm = 67.7484, GNorm = 0.4798, lr_0 = 2.5394e-04
Loss = 5.3587e-02, PNorm = 67.7508, GNorm = 0.3911, lr_0 = 2.5377e-04
Loss = 4.8100e-02, PNorm = 67.7546, GNorm = 0.5877, lr_0 = 2.5359e-04
Loss = 4.8427e-02, PNorm = 67.7584, GNorm = 0.5242, lr_0 = 2.5342e-04
Loss = 5.3870e-02, PNorm = 67.7623, GNorm = 0.4028, lr_0 = 2.5325e-04
Loss = 5.1736e-02, PNorm = 67.7671, GNorm = 0.4721, lr_0 = 2.5307e-04
Loss = 5.7861e-02, PNorm = 67.7729, GNorm = 0.5190, lr_0 = 2.5290e-04
Loss = 5.3586e-02, PNorm = 67.7787, GNorm = 0.5062, lr_0 = 2.5273e-04
Loss = 5.1067e-02, PNorm = 67.7837, GNorm = 0.4531, lr_0 = 2.5255e-04
Loss = 5.4534e-02, PNorm = 67.7895, GNorm = 0.5416, lr_0 = 2.5238e-04
Loss = 5.3502e-02, PNorm = 67.7967, GNorm = 0.4459, lr_0 = 2.5221e-04
Loss = 4.4682e-02, PNorm = 67.7998, GNorm = 0.4373, lr_0 = 2.5203e-04
Loss = 5.2769e-02, PNorm = 67.8019, GNorm = 0.4799, lr_0 = 2.5186e-04
Loss = 5.6062e-02, PNorm = 67.8056, GNorm = 0.7108, lr_0 = 2.5169e-04
Loss = 4.9737e-02, PNorm = 67.8091, GNorm = 0.5237, lr_0 = 2.5152e-04
Loss = 6.0480e-02, PNorm = 67.8133, GNorm = 0.6195, lr_0 = 2.5134e-04
Loss = 5.6588e-02, PNorm = 67.8195, GNorm = 0.4067, lr_0 = 2.5117e-04
Loss = 6.5840e-02, PNorm = 67.8248, GNorm = 0.8043, lr_0 = 2.5100e-04
Loss = 5.3952e-02, PNorm = 67.8297, GNorm = 0.4463, lr_0 = 2.5083e-04
Loss = 6.5464e-02, PNorm = 67.8337, GNorm = 0.5385, lr_0 = 2.5066e-04
Loss = 6.2630e-02, PNorm = 67.8393, GNorm = 0.5942, lr_0 = 2.5048e-04
Loss = 4.9111e-02, PNorm = 67.8431, GNorm = 0.5146, lr_0 = 2.5031e-04
Loss = 5.3428e-02, PNorm = 67.8468, GNorm = 0.6173, lr_0 = 2.5014e-04
Loss = 5.7770e-02, PNorm = 67.8529, GNorm = 0.7682, lr_0 = 2.4997e-04
Loss = 5.1054e-02, PNorm = 67.8571, GNorm = 0.7215, lr_0 = 2.4980e-04
Loss = 5.6010e-02, PNorm = 67.8591, GNorm = 0.5351, lr_0 = 2.4963e-04
Loss = 5.7481e-02, PNorm = 67.8618, GNorm = 0.5286, lr_0 = 2.4946e-04
Loss = 5.2504e-02, PNorm = 67.8655, GNorm = 0.6799, lr_0 = 2.4929e-04
Loss = 6.0686e-02, PNorm = 67.8696, GNorm = 0.4409, lr_0 = 2.4911e-04
Loss = 5.2143e-02, PNorm = 67.8755, GNorm = 0.5796, lr_0 = 2.4894e-04
Loss = 7.1990e-02, PNorm = 67.8799, GNorm = 0.5116, lr_0 = 2.4877e-04
Loss = 6.4259e-02, PNorm = 67.8860, GNorm = 0.8191, lr_0 = 2.4860e-04
Loss = 6.0520e-02, PNorm = 67.8917, GNorm = 0.7325, lr_0 = 2.4843e-04
Loss = 5.4771e-02, PNorm = 67.8973, GNorm = 0.6040, lr_0 = 2.4826e-04
Loss = 6.0451e-02, PNorm = 67.9022, GNorm = 0.7044, lr_0 = 2.4809e-04
Loss = 5.8121e-02, PNorm = 67.9061, GNorm = 0.4795, lr_0 = 2.4792e-04
Loss = 5.5988e-02, PNorm = 67.9104, GNorm = 0.5754, lr_0 = 2.4775e-04
Loss = 5.4200e-02, PNorm = 67.9151, GNorm = 0.5494, lr_0 = 2.4758e-04
Loss = 4.8659e-02, PNorm = 67.9208, GNorm = 0.5724, lr_0 = 2.4741e-04
Loss = 5.5362e-02, PNorm = 67.9255, GNorm = 0.5217, lr_0 = 2.4724e-04
Loss = 5.4800e-02, PNorm = 67.9296, GNorm = 0.7133, lr_0 = 2.4707e-04
Validation mae = 0.386898
Epoch 19
Loss = 4.9461e-02, PNorm = 67.9343, GNorm = 1.2714, lr_0 = 2.4690e-04
Loss = 5.3304e-02, PNorm = 67.9395, GNorm = 0.5062, lr_0 = 2.4674e-04
Loss = 4.5883e-02, PNorm = 67.9454, GNorm = 0.4036, lr_0 = 2.4657e-04
Loss = 4.7723e-02, PNorm = 67.9499, GNorm = 0.3623, lr_0 = 2.4640e-04
Loss = 4.9619e-02, PNorm = 67.9515, GNorm = 0.4440, lr_0 = 2.4623e-04
Loss = 4.8434e-02, PNorm = 67.9545, GNorm = 0.4391, lr_0 = 2.4606e-04
Loss = 5.7693e-02, PNorm = 67.9593, GNorm = 0.6944, lr_0 = 2.4589e-04
Loss = 4.1688e-02, PNorm = 67.9660, GNorm = 0.4615, lr_0 = 2.4572e-04
Loss = 4.6461e-02, PNorm = 67.9719, GNorm = 0.4118, lr_0 = 2.4556e-04
Loss = 4.2172e-02, PNorm = 67.9768, GNorm = 0.3614, lr_0 = 2.4539e-04
Loss = 4.3360e-02, PNorm = 67.9802, GNorm = 0.5140, lr_0 = 2.4522e-04
Loss = 4.5687e-02, PNorm = 67.9815, GNorm = 0.4218, lr_0 = 2.4505e-04
Loss = 4.8278e-02, PNorm = 67.9859, GNorm = 0.7510, lr_0 = 2.4488e-04
Loss = 5.4014e-02, PNorm = 67.9921, GNorm = 0.8157, lr_0 = 2.4472e-04
Loss = 4.9197e-02, PNorm = 67.9983, GNorm = 0.5145, lr_0 = 2.4455e-04
Loss = 4.8481e-02, PNorm = 68.0049, GNorm = 0.6026, lr_0 = 2.4438e-04
Loss = 4.8131e-02, PNorm = 68.0089, GNorm = 0.6077, lr_0 = 2.4421e-04
Loss = 5.5633e-02, PNorm = 68.0146, GNorm = 0.5264, lr_0 = 2.4405e-04
Loss = 4.5957e-02, PNorm = 68.0226, GNorm = 0.6013, lr_0 = 2.4388e-04
Loss = 4.3299e-02, PNorm = 68.0286, GNorm = 0.4962, lr_0 = 2.4371e-04
Loss = 4.0711e-02, PNorm = 68.0344, GNorm = 0.3337, lr_0 = 2.4354e-04
Loss = 5.1766e-02, PNorm = 68.0376, GNorm = 0.5291, lr_0 = 2.4338e-04
Loss = 4.4755e-02, PNorm = 68.0408, GNorm = 0.6375, lr_0 = 2.4321e-04
Loss = 5.3978e-02, PNorm = 68.0443, GNorm = 0.5778, lr_0 = 2.4304e-04
Loss = 5.3441e-02, PNorm = 68.0481, GNorm = 0.7708, lr_0 = 2.4288e-04
Loss = 5.0931e-02, PNorm = 68.0510, GNorm = 0.6065, lr_0 = 2.4271e-04
Loss = 4.3665e-02, PNorm = 68.0546, GNorm = 0.5294, lr_0 = 2.4254e-04
Loss = 4.5411e-02, PNorm = 68.0592, GNorm = 0.6525, lr_0 = 2.4238e-04
Loss = 5.2314e-02, PNorm = 68.0631, GNorm = 0.4416, lr_0 = 2.4221e-04
Loss = 5.5883e-02, PNorm = 68.0679, GNorm = 0.5455, lr_0 = 2.4205e-04
Loss = 5.2760e-02, PNorm = 68.0727, GNorm = 0.6018, lr_0 = 2.4188e-04
Loss = 5.2699e-02, PNorm = 68.0763, GNorm = 0.3981, lr_0 = 2.4171e-04
Loss = 4.6191e-02, PNorm = 68.0823, GNorm = 0.6527, lr_0 = 2.4155e-04
Loss = 5.4449e-02, PNorm = 68.0895, GNorm = 0.7718, lr_0 = 2.4138e-04
Loss = 4.6534e-02, PNorm = 68.0945, GNorm = 0.6938, lr_0 = 2.4122e-04
Loss = 4.3645e-02, PNorm = 68.0995, GNorm = 0.5913, lr_0 = 2.4105e-04
Loss = 5.1083e-02, PNorm = 68.1050, GNorm = 0.4062, lr_0 = 2.4089e-04
Loss = 5.2528e-02, PNorm = 68.1080, GNorm = 0.5062, lr_0 = 2.4072e-04
Loss = 4.7940e-02, PNorm = 68.1075, GNorm = 0.5542, lr_0 = 2.4056e-04
Loss = 5.7652e-02, PNorm = 68.1079, GNorm = 0.4730, lr_0 = 2.4039e-04
Loss = 4.9721e-02, PNorm = 68.1120, GNorm = 0.6927, lr_0 = 2.4023e-04
Loss = 5.4312e-02, PNorm = 68.1164, GNorm = 0.6281, lr_0 = 2.4006e-04
Loss = 5.6155e-02, PNorm = 68.1192, GNorm = 0.5991, lr_0 = 2.3990e-04
Loss = 4.6573e-02, PNorm = 68.1236, GNorm = 0.4066, lr_0 = 2.3974e-04
Loss = 4.6193e-02, PNorm = 68.1274, GNorm = 0.5743, lr_0 = 2.3957e-04
Loss = 4.9328e-02, PNorm = 68.1310, GNorm = 0.4154, lr_0 = 2.3941e-04
Loss = 4.5242e-02, PNorm = 68.1333, GNorm = 0.6974, lr_0 = 2.3924e-04
Loss = 4.6946e-02, PNorm = 68.1369, GNorm = 0.7207, lr_0 = 2.3908e-04
Loss = 5.1990e-02, PNorm = 68.1420, GNorm = 0.8096, lr_0 = 2.3892e-04
Loss = 5.2174e-02, PNorm = 68.1472, GNorm = 0.5058, lr_0 = 2.3875e-04
Loss = 5.3243e-02, PNorm = 68.1533, GNorm = 0.4149, lr_0 = 2.3859e-04
Loss = 5.3843e-02, PNorm = 68.1575, GNorm = 0.6166, lr_0 = 2.3842e-04
Loss = 5.2587e-02, PNorm = 68.1628, GNorm = 0.7955, lr_0 = 2.3826e-04
Loss = 5.0282e-02, PNorm = 68.1675, GNorm = 0.4548, lr_0 = 2.3810e-04
Loss = 5.2617e-02, PNorm = 68.1720, GNorm = 0.6519, lr_0 = 2.3794e-04
Loss = 4.6233e-02, PNorm = 68.1739, GNorm = 0.6524, lr_0 = 2.3777e-04
Loss = 5.1520e-02, PNorm = 68.1758, GNorm = 0.5652, lr_0 = 2.3761e-04
Loss = 4.2426e-02, PNorm = 68.1789, GNorm = 0.6120, lr_0 = 2.3745e-04
Loss = 5.6345e-02, PNorm = 68.1802, GNorm = 0.7753, lr_0 = 2.3728e-04
Loss = 4.9569e-02, PNorm = 68.1820, GNorm = 0.5591, lr_0 = 2.3712e-04
Loss = 4.9751e-02, PNorm = 68.1865, GNorm = 0.5759, lr_0 = 2.3696e-04
Loss = 5.4988e-02, PNorm = 68.1915, GNorm = 0.6708, lr_0 = 2.3680e-04
Loss = 4.9807e-02, PNorm = 68.1967, GNorm = 0.7351, lr_0 = 2.3663e-04
Loss = 6.0829e-02, PNorm = 68.2030, GNorm = 0.6984, lr_0 = 2.3647e-04
Loss = 5.0154e-02, PNorm = 68.2084, GNorm = 0.5019, lr_0 = 2.3631e-04
Loss = 5.4429e-02, PNorm = 68.2128, GNorm = 0.5856, lr_0 = 2.3615e-04
Loss = 5.1118e-02, PNorm = 68.2157, GNorm = 0.5392, lr_0 = 2.3599e-04
Loss = 4.9055e-02, PNorm = 68.2197, GNorm = 0.3627, lr_0 = 2.3582e-04
Loss = 5.6689e-02, PNorm = 68.2220, GNorm = 0.6161, lr_0 = 2.3566e-04
Loss = 4.9370e-02, PNorm = 68.2244, GNorm = 0.8597, lr_0 = 2.3550e-04
Loss = 5.8997e-02, PNorm = 68.2314, GNorm = 0.5079, lr_0 = 2.3534e-04
Loss = 4.9796e-02, PNorm = 68.2362, GNorm = 0.9137, lr_0 = 2.3518e-04
Loss = 5.3348e-02, PNorm = 68.2412, GNorm = 1.0258, lr_0 = 2.3502e-04
Loss = 5.4281e-02, PNorm = 68.2457, GNorm = 1.0405, lr_0 = 2.3486e-04
Loss = 4.9091e-02, PNorm = 68.2511, GNorm = 0.4988, lr_0 = 2.3470e-04
Loss = 4.9613e-02, PNorm = 68.2554, GNorm = 0.9097, lr_0 = 2.3454e-04
Loss = 4.8658e-02, PNorm = 68.2617, GNorm = 0.5145, lr_0 = 2.3437e-04
Loss = 5.2990e-02, PNorm = 68.2665, GNorm = 0.3908, lr_0 = 2.3421e-04
Loss = 5.3353e-02, PNorm = 68.2708, GNorm = 0.5420, lr_0 = 2.3405e-04
Loss = 5.4325e-02, PNorm = 68.2747, GNorm = 0.5709, lr_0 = 2.3389e-04
Loss = 5.5025e-02, PNorm = 68.2775, GNorm = 0.5688, lr_0 = 2.3373e-04
Loss = 4.6085e-02, PNorm = 68.2812, GNorm = 0.4791, lr_0 = 2.3357e-04
Loss = 5.2475e-02, PNorm = 68.2851, GNorm = 0.8767, lr_0 = 2.3341e-04
Loss = 4.9530e-02, PNorm = 68.2899, GNorm = 0.4316, lr_0 = 2.3325e-04
Loss = 5.0559e-02, PNorm = 68.2939, GNorm = 0.6341, lr_0 = 2.3309e-04
Loss = 5.2098e-02, PNorm = 68.2966, GNorm = 0.6394, lr_0 = 2.3293e-04
Loss = 4.5840e-02, PNorm = 68.2995, GNorm = 0.6947, lr_0 = 2.3277e-04
Loss = 5.2767e-02, PNorm = 68.3028, GNorm = 0.3699, lr_0 = 2.3261e-04
Loss = 4.6613e-02, PNorm = 68.3048, GNorm = 0.5369, lr_0 = 2.3246e-04
Loss = 4.9854e-02, PNorm = 68.3077, GNorm = 0.5545, lr_0 = 2.3230e-04
Loss = 5.4766e-02, PNorm = 68.3116, GNorm = 1.1189, lr_0 = 2.3214e-04
Loss = 5.2574e-02, PNorm = 68.3145, GNorm = 0.5166, lr_0 = 2.3198e-04
Loss = 5.2419e-02, PNorm = 68.3177, GNorm = 0.3979, lr_0 = 2.3182e-04
Loss = 5.7471e-02, PNorm = 68.3213, GNorm = 0.7799, lr_0 = 2.3166e-04
Loss = 4.9902e-02, PNorm = 68.3255, GNorm = 0.6907, lr_0 = 2.3150e-04
Loss = 5.1535e-02, PNorm = 68.3308, GNorm = 0.4877, lr_0 = 2.3134e-04
Loss = 5.0331e-02, PNorm = 68.3336, GNorm = 0.4925, lr_0 = 2.3118e-04
Loss = 4.5046e-02, PNorm = 68.3365, GNorm = 0.4941, lr_0 = 2.3103e-04
Loss = 5.5507e-02, PNorm = 68.3390, GNorm = 0.5476, lr_0 = 2.3087e-04
Loss = 5.0752e-02, PNorm = 68.3427, GNorm = 0.4088, lr_0 = 2.3071e-04
Loss = 4.7111e-02, PNorm = 68.3475, GNorm = 0.5196, lr_0 = 2.3055e-04
Loss = 5.3087e-02, PNorm = 68.3519, GNorm = 0.6226, lr_0 = 2.3039e-04
Loss = 4.7904e-02, PNorm = 68.3560, GNorm = 0.3870, lr_0 = 2.3024e-04
Loss = 5.6076e-02, PNorm = 68.3599, GNorm = 0.8872, lr_0 = 2.3008e-04
Loss = 4.3137e-02, PNorm = 68.3646, GNorm = 0.4704, lr_0 = 2.2992e-04
Loss = 4.6528e-02, PNorm = 68.3675, GNorm = 0.5678, lr_0 = 2.2976e-04
Loss = 6.0497e-02, PNorm = 68.3716, GNorm = 0.7926, lr_0 = 2.2961e-04
Loss = 5.4187e-02, PNorm = 68.3762, GNorm = 0.6010, lr_0 = 2.2945e-04
Loss = 5.3777e-02, PNorm = 68.3799, GNorm = 0.5003, lr_0 = 2.2929e-04
Loss = 4.8974e-02, PNorm = 68.3821, GNorm = 0.4239, lr_0 = 2.2913e-04
Loss = 4.6745e-02, PNorm = 68.3838, GNorm = 0.5480, lr_0 = 2.2898e-04
Loss = 4.8301e-02, PNorm = 68.3868, GNorm = 0.4407, lr_0 = 2.2882e-04
Loss = 5.0463e-02, PNorm = 68.3905, GNorm = 0.5697, lr_0 = 2.2866e-04
Loss = 4.7723e-02, PNorm = 68.3946, GNorm = 0.4512, lr_0 = 2.2851e-04
Loss = 5.2433e-02, PNorm = 68.4001, GNorm = 0.8156, lr_0 = 2.2835e-04
Loss = 4.5762e-02, PNorm = 68.4051, GNorm = 0.7009, lr_0 = 2.2819e-04
Loss = 4.3075e-02, PNorm = 68.4086, GNorm = 0.3972, lr_0 = 2.2804e-04
Loss = 4.9969e-02, PNorm = 68.4119, GNorm = 0.4571, lr_0 = 2.2788e-04
Loss = 4.7561e-02, PNorm = 68.4138, GNorm = 0.5465, lr_0 = 2.2773e-04
Loss = 4.9702e-02, PNorm = 68.4172, GNorm = 0.3383, lr_0 = 2.2757e-04
Validation mae = 0.384220
Epoch 20
Loss = 4.2267e-02, PNorm = 68.4220, GNorm = 0.4777, lr_0 = 2.2741e-04
Loss = 4.8399e-02, PNorm = 68.4267, GNorm = 0.8914, lr_0 = 2.2726e-04
Loss = 4.4436e-02, PNorm = 68.4321, GNorm = 0.4226, lr_0 = 2.2710e-04
Loss = 4.6926e-02, PNorm = 68.4355, GNorm = 0.5495, lr_0 = 2.2695e-04
Loss = 4.1263e-02, PNorm = 68.4388, GNorm = 0.6267, lr_0 = 2.2679e-04
Loss = 4.7769e-02, PNorm = 68.4431, GNorm = 0.6309, lr_0 = 2.2664e-04
Loss = 4.7328e-02, PNorm = 68.4479, GNorm = 0.6297, lr_0 = 2.2648e-04
Loss = 4.7324e-02, PNorm = 68.4532, GNorm = 0.5168, lr_0 = 2.2632e-04
Loss = 4.7063e-02, PNorm = 68.4588, GNorm = 0.4164, lr_0 = 2.2617e-04
Loss = 4.9861e-02, PNorm = 68.4651, GNorm = 0.5733, lr_0 = 2.2601e-04
Loss = 5.2508e-02, PNorm = 68.4684, GNorm = 0.4724, lr_0 = 2.2586e-04
Loss = 5.0128e-02, PNorm = 68.4728, GNorm = 0.5270, lr_0 = 2.2571e-04
Loss = 4.5945e-02, PNorm = 68.4787, GNorm = 0.7508, lr_0 = 2.2555e-04
Loss = 4.7681e-02, PNorm = 68.4841, GNorm = 0.5480, lr_0 = 2.2540e-04
Loss = 4.4876e-02, PNorm = 68.4894, GNorm = 0.4607, lr_0 = 2.2524e-04
Loss = 4.3715e-02, PNorm = 68.4919, GNorm = 0.4006, lr_0 = 2.2509e-04
Loss = 3.9426e-02, PNorm = 68.4951, GNorm = 0.5518, lr_0 = 2.2493e-04
Loss = 5.0314e-02, PNorm = 68.5012, GNorm = 0.6801, lr_0 = 2.2478e-04
Loss = 5.3581e-02, PNorm = 68.5049, GNorm = 0.7283, lr_0 = 2.2463e-04
Loss = 4.5432e-02, PNorm = 68.5080, GNorm = 0.5699, lr_0 = 2.2447e-04
Loss = 4.1475e-02, PNorm = 68.5122, GNorm = 0.4380, lr_0 = 2.2432e-04
Loss = 4.8228e-02, PNorm = 68.5168, GNorm = 0.5521, lr_0 = 2.2416e-04
Loss = 4.2834e-02, PNorm = 68.5204, GNorm = 0.5517, lr_0 = 2.2401e-04
Loss = 4.8076e-02, PNorm = 68.5238, GNorm = 0.4492, lr_0 = 2.2386e-04
Loss = 5.0024e-02, PNorm = 68.5256, GNorm = 0.3805, lr_0 = 2.2370e-04
Loss = 4.7432e-02, PNorm = 68.5297, GNorm = 0.3871, lr_0 = 2.2355e-04
Loss = 4.3989e-02, PNorm = 68.5339, GNorm = 0.5813, lr_0 = 2.2340e-04
Loss = 4.8505e-02, PNorm = 68.5378, GNorm = 0.7766, lr_0 = 2.2324e-04
Loss = 4.9430e-02, PNorm = 68.5401, GNorm = 0.6481, lr_0 = 2.2309e-04
Loss = 4.3264e-02, PNorm = 68.5428, GNorm = 0.5869, lr_0 = 2.2294e-04
Loss = 4.4874e-02, PNorm = 68.5459, GNorm = 0.5610, lr_0 = 2.2279e-04
Loss = 4.6504e-02, PNorm = 68.5496, GNorm = 0.4642, lr_0 = 2.2263e-04
Loss = 4.5614e-02, PNorm = 68.5523, GNorm = 0.5048, lr_0 = 2.2248e-04
Loss = 4.7155e-02, PNorm = 68.5534, GNorm = 0.4204, lr_0 = 2.2233e-04
Loss = 4.5340e-02, PNorm = 68.5565, GNorm = 0.5512, lr_0 = 2.2218e-04
Loss = 5.2366e-02, PNorm = 68.5597, GNorm = 0.4906, lr_0 = 2.2202e-04
Loss = 4.4233e-02, PNorm = 68.5628, GNorm = 0.8382, lr_0 = 2.2187e-04
Loss = 5.0377e-02, PNorm = 68.5669, GNorm = 0.4959, lr_0 = 2.2172e-04
Loss = 5.2375e-02, PNorm = 68.5702, GNorm = 0.6773, lr_0 = 2.2157e-04
Loss = 3.9812e-02, PNorm = 68.5738, GNorm = 0.6493, lr_0 = 2.2142e-04
Loss = 5.1054e-02, PNorm = 68.5782, GNorm = 0.4115, lr_0 = 2.2126e-04
Loss = 5.2851e-02, PNorm = 68.5819, GNorm = 0.5885, lr_0 = 2.2111e-04
Loss = 4.2603e-02, PNorm = 68.5859, GNorm = 0.3779, lr_0 = 2.2096e-04
Loss = 5.0761e-02, PNorm = 68.5901, GNorm = 0.5079, lr_0 = 2.2081e-04
Loss = 4.5954e-02, PNorm = 68.5949, GNorm = 0.6134, lr_0 = 2.2066e-04
Loss = 4.5876e-02, PNorm = 68.5991, GNorm = 0.6173, lr_0 = 2.2051e-04
Loss = 4.2965e-02, PNorm = 68.6019, GNorm = 0.4550, lr_0 = 2.2036e-04
Loss = 4.0818e-02, PNorm = 68.6062, GNorm = 0.5476, lr_0 = 2.2021e-04
Loss = 4.0639e-02, PNorm = 68.6094, GNorm = 0.5144, lr_0 = 2.2005e-04
Loss = 4.1803e-02, PNorm = 68.6133, GNorm = 0.5962, lr_0 = 2.1990e-04
Loss = 4.7469e-02, PNorm = 68.6175, GNorm = 0.6018, lr_0 = 2.1975e-04
Loss = 4.2170e-02, PNorm = 68.6204, GNorm = 0.7386, lr_0 = 2.1960e-04
Loss = 4.5481e-02, PNorm = 68.6230, GNorm = 0.5450, lr_0 = 2.1945e-04
Loss = 4.3870e-02, PNorm = 68.6256, GNorm = 0.4477, lr_0 = 2.1930e-04
Loss = 4.9655e-02, PNorm = 68.6296, GNorm = 0.5451, lr_0 = 2.1915e-04
Loss = 4.2515e-02, PNorm = 68.6337, GNorm = 1.1051, lr_0 = 2.1900e-04
Loss = 4.6676e-02, PNorm = 68.6364, GNorm = 0.4490, lr_0 = 2.1885e-04
Loss = 4.3380e-02, PNorm = 68.6386, GNorm = 1.0541, lr_0 = 2.1870e-04
Loss = 4.0530e-02, PNorm = 68.6413, GNorm = 0.4609, lr_0 = 2.1855e-04
Loss = 4.8056e-02, PNorm = 68.6430, GNorm = 0.5457, lr_0 = 2.1840e-04
Loss = 5.2914e-02, PNorm = 68.6458, GNorm = 0.8512, lr_0 = 2.1825e-04
Loss = 4.6018e-02, PNorm = 68.6491, GNorm = 0.3856, lr_0 = 2.1810e-04
Loss = 4.4816e-02, PNorm = 68.6547, GNorm = 0.4303, lr_0 = 2.1795e-04
Loss = 4.6840e-02, PNorm = 68.6596, GNorm = 0.6791, lr_0 = 2.1780e-04
Loss = 4.7406e-02, PNorm = 68.6646, GNorm = 0.5566, lr_0 = 2.1765e-04
Loss = 5.2981e-02, PNorm = 68.6700, GNorm = 0.4241, lr_0 = 2.1751e-04
Loss = 4.6450e-02, PNorm = 68.6746, GNorm = 0.5646, lr_0 = 2.1736e-04
Loss = 4.5723e-02, PNorm = 68.6787, GNorm = 0.4851, lr_0 = 2.1721e-04
Loss = 5.3865e-02, PNorm = 68.6830, GNorm = 0.5086, lr_0 = 2.1706e-04
Loss = 4.6952e-02, PNorm = 68.6865, GNorm = 0.7419, lr_0 = 2.1691e-04
Loss = 4.9557e-02, PNorm = 68.6900, GNorm = 0.4737, lr_0 = 2.1676e-04
Loss = 4.7840e-02, PNorm = 68.6935, GNorm = 0.5082, lr_0 = 2.1661e-04
Loss = 6.0823e-02, PNorm = 68.6985, GNorm = 0.8676, lr_0 = 2.1646e-04
Loss = 5.3116e-02, PNorm = 68.7043, GNorm = 0.7678, lr_0 = 2.1632e-04
Loss = 4.5307e-02, PNorm = 68.7085, GNorm = 0.8521, lr_0 = 2.1617e-04
Loss = 5.6882e-02, PNorm = 68.7124, GNorm = 0.5560, lr_0 = 2.1602e-04
Loss = 4.7256e-02, PNorm = 68.7152, GNorm = 0.4995, lr_0 = 2.1587e-04
Loss = 5.4797e-02, PNorm = 68.7166, GNorm = 0.6355, lr_0 = 2.1572e-04
Loss = 4.6915e-02, PNorm = 68.7232, GNorm = 0.5016, lr_0 = 2.1558e-04
Loss = 4.4347e-02, PNorm = 68.7265, GNorm = 0.4124, lr_0 = 2.1543e-04
Loss = 5.4268e-02, PNorm = 68.7308, GNorm = 1.0625, lr_0 = 2.1528e-04
Loss = 4.2174e-02, PNorm = 68.7349, GNorm = 0.6360, lr_0 = 2.1513e-04
Loss = 4.8173e-02, PNorm = 68.7383, GNorm = 0.5608, lr_0 = 2.1499e-04
Loss = 5.0433e-02, PNorm = 68.7433, GNorm = 0.5446, lr_0 = 2.1484e-04
Loss = 4.5833e-02, PNorm = 68.7489, GNorm = 0.4942, lr_0 = 2.1469e-04
Loss = 4.8157e-02, PNorm = 68.7526, GNorm = 0.7703, lr_0 = 2.1454e-04
Loss = 4.3500e-02, PNorm = 68.7562, GNorm = 0.5284, lr_0 = 2.1440e-04
Loss = 5.2377e-02, PNorm = 68.7584, GNorm = 0.6074, lr_0 = 2.1425e-04
Loss = 4.8665e-02, PNorm = 68.7615, GNorm = 0.4885, lr_0 = 2.1410e-04
Loss = 4.8325e-02, PNorm = 68.7663, GNorm = 0.5524, lr_0 = 2.1396e-04
Loss = 5.1115e-02, PNorm = 68.7706, GNorm = 0.5330, lr_0 = 2.1381e-04
Loss = 5.2040e-02, PNorm = 68.7738, GNorm = 0.4930, lr_0 = 2.1366e-04
Loss = 5.2059e-02, PNorm = 68.7767, GNorm = 1.0703, lr_0 = 2.1352e-04
Loss = 4.8084e-02, PNorm = 68.7795, GNorm = 0.4795, lr_0 = 2.1337e-04
Loss = 4.3830e-02, PNorm = 68.7816, GNorm = 1.0139, lr_0 = 2.1323e-04
Loss = 4.6381e-02, PNorm = 68.7853, GNorm = 0.4358, lr_0 = 2.1308e-04
Loss = 4.8717e-02, PNorm = 68.7896, GNorm = 0.7855, lr_0 = 2.1293e-04
Loss = 4.1448e-02, PNorm = 68.7931, GNorm = 0.8835, lr_0 = 2.1279e-04
Loss = 4.5183e-02, PNorm = 68.7960, GNorm = 0.4421, lr_0 = 2.1264e-04
Loss = 4.4954e-02, PNorm = 68.7992, GNorm = 0.5095, lr_0 = 2.1250e-04
Loss = 4.8861e-02, PNorm = 68.8041, GNorm = 0.4173, lr_0 = 2.1235e-04
Loss = 5.5012e-02, PNorm = 68.8063, GNorm = 0.4842, lr_0 = 2.1221e-04
Loss = 4.7679e-02, PNorm = 68.8087, GNorm = 0.4206, lr_0 = 2.1206e-04
Loss = 4.6868e-02, PNorm = 68.8124, GNorm = 0.5364, lr_0 = 2.1191e-04
Loss = 4.6473e-02, PNorm = 68.8166, GNorm = 0.5523, lr_0 = 2.1177e-04
Loss = 4.9481e-02, PNorm = 68.8197, GNorm = 0.4506, lr_0 = 2.1162e-04
Loss = 4.7122e-02, PNorm = 68.8221, GNorm = 0.4247, lr_0 = 2.1148e-04
Loss = 5.7967e-02, PNorm = 68.8252, GNorm = 0.4881, lr_0 = 2.1133e-04
Loss = 4.6545e-02, PNorm = 68.8292, GNorm = 0.3952, lr_0 = 2.1119e-04
Loss = 4.9463e-02, PNorm = 68.8328, GNorm = 0.6106, lr_0 = 2.1104e-04
Loss = 5.6675e-02, PNorm = 68.8334, GNorm = 0.9020, lr_0 = 2.1090e-04
Loss = 5.1572e-02, PNorm = 68.8359, GNorm = 0.5488, lr_0 = 2.1076e-04
Loss = 4.2565e-02, PNorm = 68.8384, GNorm = 0.6626, lr_0 = 2.1061e-04
Loss = 5.8172e-02, PNorm = 68.8418, GNorm = 0.7625, lr_0 = 2.1047e-04
Loss = 4.5274e-02, PNorm = 68.8467, GNorm = 0.4376, lr_0 = 2.1032e-04
Loss = 3.9501e-02, PNorm = 68.8518, GNorm = 0.8460, lr_0 = 2.1018e-04
Loss = 4.8466e-02, PNorm = 68.8563, GNorm = 0.5464, lr_0 = 2.1003e-04
Loss = 4.2909e-02, PNorm = 68.8608, GNorm = 0.5087, lr_0 = 2.0989e-04
Loss = 4.2382e-02, PNorm = 68.8643, GNorm = 0.4853, lr_0 = 2.0975e-04
Loss = 4.5611e-02, PNorm = 68.8690, GNorm = 0.5657, lr_0 = 2.0960e-04
Validation mae = 0.386290
Epoch 21
Loss = 3.7865e-02, PNorm = 68.8730, GNorm = 0.5011, lr_0 = 2.0946e-04
Loss = 3.9796e-02, PNorm = 68.8762, GNorm = 0.5833, lr_0 = 2.0932e-04
Loss = 4.5073e-02, PNorm = 68.8805, GNorm = 0.5636, lr_0 = 2.0917e-04
Loss = 4.4134e-02, PNorm = 68.8850, GNorm = 0.4462, lr_0 = 2.0903e-04
Loss = 3.6570e-02, PNorm = 68.8883, GNorm = 0.4239, lr_0 = 2.0889e-04
Loss = 4.2108e-02, PNorm = 68.8916, GNorm = 0.4388, lr_0 = 2.0874e-04
Loss = 4.5279e-02, PNorm = 68.8952, GNorm = 0.7806, lr_0 = 2.0860e-04
Loss = 4.3111e-02, PNorm = 68.9000, GNorm = 0.6828, lr_0 = 2.0846e-04
Loss = 3.6569e-02, PNorm = 68.9030, GNorm = 0.4289, lr_0 = 2.0831e-04
Loss = 4.6810e-02, PNorm = 68.9059, GNorm = 0.5884, lr_0 = 2.0817e-04
Loss = 3.4711e-02, PNorm = 68.9102, GNorm = 0.4876, lr_0 = 2.0803e-04
Loss = 4.3126e-02, PNorm = 68.9119, GNorm = 0.4297, lr_0 = 2.0789e-04
Loss = 3.9963e-02, PNorm = 68.9144, GNorm = 0.4871, lr_0 = 2.0774e-04
Loss = 4.6525e-02, PNorm = 68.9178, GNorm = 0.5909, lr_0 = 2.0760e-04
Loss = 3.5446e-02, PNorm = 68.9213, GNorm = 0.5610, lr_0 = 2.0746e-04
Loss = 5.1066e-02, PNorm = 68.9264, GNorm = 0.6322, lr_0 = 2.0732e-04
Loss = 5.2344e-02, PNorm = 68.9300, GNorm = 0.6469, lr_0 = 2.0718e-04
Loss = 4.3885e-02, PNorm = 68.9304, GNorm = 0.4344, lr_0 = 2.0703e-04
Loss = 4.2570e-02, PNorm = 68.9318, GNorm = 0.6084, lr_0 = 2.0689e-04
Loss = 4.6028e-02, PNorm = 68.9349, GNorm = 0.4174, lr_0 = 2.0675e-04
Loss = 3.6566e-02, PNorm = 68.9358, GNorm = 0.5790, lr_0 = 2.0661e-04
Loss = 4.2558e-02, PNorm = 68.9387, GNorm = 0.4984, lr_0 = 2.0647e-04
Loss = 4.2702e-02, PNorm = 68.9429, GNorm = 0.4897, lr_0 = 2.0633e-04
Loss = 4.9253e-02, PNorm = 68.9456, GNorm = 0.6849, lr_0 = 2.0618e-04
Loss = 4.2829e-02, PNorm = 68.9482, GNorm = 0.7510, lr_0 = 2.0604e-04
Loss = 4.5973e-02, PNorm = 68.9504, GNorm = 0.4869, lr_0 = 2.0590e-04
Loss = 4.4547e-02, PNorm = 68.9523, GNorm = 0.5600, lr_0 = 2.0576e-04
Loss = 4.7240e-02, PNorm = 68.9551, GNorm = 0.6467, lr_0 = 2.0562e-04
Loss = 4.7051e-02, PNorm = 68.9586, GNorm = 0.8323, lr_0 = 2.0548e-04
Loss = 5.0441e-02, PNorm = 68.9591, GNorm = 0.6526, lr_0 = 2.0534e-04
Loss = 4.0936e-02, PNorm = 68.9632, GNorm = 0.5581, lr_0 = 2.0520e-04
Loss = 3.8961e-02, PNorm = 68.9672, GNorm = 0.4367, lr_0 = 2.0506e-04
Loss = 4.4895e-02, PNorm = 68.9686, GNorm = 0.5160, lr_0 = 2.0492e-04
Loss = 4.3779e-02, PNorm = 68.9729, GNorm = 0.4100, lr_0 = 2.0478e-04
Loss = 3.7605e-02, PNorm = 68.9773, GNorm = 0.5085, lr_0 = 2.0464e-04
Loss = 4.9018e-02, PNorm = 68.9816, GNorm = 0.5409, lr_0 = 2.0450e-04
Loss = 4.4309e-02, PNorm = 68.9860, GNorm = 0.3981, lr_0 = 2.0436e-04
Loss = 5.0086e-02, PNorm = 68.9887, GNorm = 0.5946, lr_0 = 2.0422e-04
Loss = 4.4561e-02, PNorm = 68.9918, GNorm = 0.5142, lr_0 = 2.0408e-04
Loss = 4.0056e-02, PNorm = 68.9956, GNorm = 0.5269, lr_0 = 2.0394e-04
Loss = 4.5765e-02, PNorm = 69.0000, GNorm = 0.6764, lr_0 = 2.0380e-04
Loss = 4.2276e-02, PNorm = 69.0039, GNorm = 0.6274, lr_0 = 2.0366e-04
Loss = 4.4075e-02, PNorm = 69.0086, GNorm = 0.6420, lr_0 = 2.0352e-04
Loss = 4.8304e-02, PNorm = 69.0118, GNorm = 0.5706, lr_0 = 2.0338e-04
Loss = 4.7164e-02, PNorm = 69.0158, GNorm = 0.4275, lr_0 = 2.0324e-04
Loss = 4.5358e-02, PNorm = 69.0188, GNorm = 0.7748, lr_0 = 2.0310e-04
Loss = 4.4379e-02, PNorm = 69.0222, GNorm = 0.5405, lr_0 = 2.0296e-04
Loss = 4.1631e-02, PNorm = 69.0246, GNorm = 0.4770, lr_0 = 2.0282e-04
Loss = 4.2028e-02, PNorm = 69.0274, GNorm = 0.6056, lr_0 = 2.0268e-04
Loss = 3.8288e-02, PNorm = 69.0313, GNorm = 0.5693, lr_0 = 2.0254e-04
Loss = 4.0270e-02, PNorm = 69.0350, GNorm = 0.5406, lr_0 = 2.0240e-04
Loss = 3.7379e-02, PNorm = 69.0383, GNorm = 0.3756, lr_0 = 2.0227e-04
Loss = 4.6219e-02, PNorm = 69.0418, GNorm = 0.7764, lr_0 = 2.0213e-04
Loss = 5.2829e-02, PNorm = 69.0462, GNorm = 0.4606, lr_0 = 2.0199e-04
Loss = 4.7686e-02, PNorm = 69.0499, GNorm = 0.5088, lr_0 = 2.0185e-04
Loss = 4.8196e-02, PNorm = 69.0549, GNorm = 0.6705, lr_0 = 2.0171e-04
Loss = 4.1489e-02, PNorm = 69.0592, GNorm = 0.6798, lr_0 = 2.0157e-04
Loss = 4.0762e-02, PNorm = 69.0620, GNorm = 0.5618, lr_0 = 2.0144e-04
Loss = 4.5511e-02, PNorm = 69.0646, GNorm = 0.5979, lr_0 = 2.0130e-04
Loss = 6.1226e-02, PNorm = 69.0695, GNorm = 0.5583, lr_0 = 2.0116e-04
Loss = 4.8627e-02, PNorm = 69.0749, GNorm = 0.5541, lr_0 = 2.0102e-04
Loss = 4.5386e-02, PNorm = 69.0789, GNorm = 0.4619, lr_0 = 2.0088e-04
Loss = 4.1233e-02, PNorm = 69.0821, GNorm = 0.3273, lr_0 = 2.0075e-04
Loss = 3.8632e-02, PNorm = 69.0846, GNorm = 0.4917, lr_0 = 2.0061e-04
Loss = 4.1414e-02, PNorm = 69.0872, GNorm = 0.3712, lr_0 = 2.0047e-04
Loss = 4.6390e-02, PNorm = 69.0897, GNorm = 0.6165, lr_0 = 2.0033e-04
Loss = 4.5858e-02, PNorm = 69.0937, GNorm = 0.5991, lr_0 = 2.0020e-04
Loss = 4.8948e-02, PNorm = 69.0955, GNorm = 0.5883, lr_0 = 2.0006e-04
Loss = 4.4078e-02, PNorm = 69.0989, GNorm = 0.5248, lr_0 = 1.9992e-04
Loss = 4.4567e-02, PNorm = 69.1027, GNorm = 0.5820, lr_0 = 1.9979e-04
Loss = 5.2075e-02, PNorm = 69.1060, GNorm = 0.6347, lr_0 = 1.9965e-04
Loss = 4.0233e-02, PNorm = 69.1090, GNorm = 0.4364, lr_0 = 1.9951e-04
Loss = 4.3104e-02, PNorm = 69.1117, GNorm = 0.4302, lr_0 = 1.9938e-04
Loss = 4.1964e-02, PNorm = 69.1138, GNorm = 0.6853, lr_0 = 1.9924e-04
Loss = 4.8616e-02, PNorm = 69.1161, GNorm = 0.5254, lr_0 = 1.9910e-04
Loss = 5.0120e-02, PNorm = 69.1169, GNorm = 0.4853, lr_0 = 1.9897e-04
Loss = 4.8131e-02, PNorm = 69.1185, GNorm = 0.6132, lr_0 = 1.9883e-04
Loss = 4.7133e-02, PNorm = 69.1236, GNorm = 0.6446, lr_0 = 1.9869e-04
Loss = 5.0133e-02, PNorm = 69.1277, GNorm = 0.6069, lr_0 = 1.9856e-04
Loss = 4.7610e-02, PNorm = 69.1315, GNorm = 0.5991, lr_0 = 1.9842e-04
Loss = 4.1545e-02, PNorm = 69.1357, GNorm = 0.7080, lr_0 = 1.9829e-04
Loss = 4.8772e-02, PNorm = 69.1388, GNorm = 0.4607, lr_0 = 1.9815e-04
Loss = 4.2877e-02, PNorm = 69.1412, GNorm = 0.8758, lr_0 = 1.9801e-04
Loss = 4.2543e-02, PNorm = 69.1452, GNorm = 0.5493, lr_0 = 1.9788e-04
Loss = 4.8263e-02, PNorm = 69.1496, GNorm = 0.3941, lr_0 = 1.9774e-04
Loss = 4.5039e-02, PNorm = 69.1518, GNorm = 0.7441, lr_0 = 1.9761e-04
Loss = 4.7553e-02, PNorm = 69.1541, GNorm = 0.5138, lr_0 = 1.9747e-04
Loss = 4.1042e-02, PNorm = 69.1556, GNorm = 0.4882, lr_0 = 1.9734e-04
Loss = 3.8858e-02, PNorm = 69.1575, GNorm = 0.4338, lr_0 = 1.9720e-04
Loss = 5.4064e-02, PNorm = 69.1597, GNorm = 0.5623, lr_0 = 1.9707e-04
Loss = 4.2331e-02, PNorm = 69.1620, GNorm = 0.5368, lr_0 = 1.9693e-04
Loss = 5.0782e-02, PNorm = 69.1660, GNorm = 0.4616, lr_0 = 1.9680e-04
Loss = 4.3876e-02, PNorm = 69.1690, GNorm = 0.8220, lr_0 = 1.9666e-04
Loss = 4.4711e-02, PNorm = 69.1718, GNorm = 0.5223, lr_0 = 1.9653e-04
Loss = 4.7793e-02, PNorm = 69.1748, GNorm = 0.5386, lr_0 = 1.9639e-04
Loss = 4.3662e-02, PNorm = 69.1789, GNorm = 0.4962, lr_0 = 1.9626e-04
Loss = 3.9927e-02, PNorm = 69.1811, GNorm = 0.4240, lr_0 = 1.9612e-04
Loss = 3.8503e-02, PNorm = 69.1836, GNorm = 0.6258, lr_0 = 1.9599e-04
Loss = 4.8825e-02, PNorm = 69.1865, GNorm = 0.4870, lr_0 = 1.9585e-04
Loss = 4.3601e-02, PNorm = 69.1900, GNorm = 0.5870, lr_0 = 1.9572e-04
Loss = 3.6170e-02, PNorm = 69.1934, GNorm = 0.3627, lr_0 = 1.9559e-04
Loss = 4.8469e-02, PNorm = 69.1962, GNorm = 0.5430, lr_0 = 1.9545e-04
Loss = 4.2549e-02, PNorm = 69.1983, GNorm = 0.4627, lr_0 = 1.9532e-04
Loss = 5.1076e-02, PNorm = 69.2019, GNorm = 0.5430, lr_0 = 1.9518e-04
Loss = 5.0205e-02, PNorm = 69.2059, GNorm = 0.6421, lr_0 = 1.9505e-04
Loss = 4.9057e-02, PNorm = 69.2118, GNorm = 0.4752, lr_0 = 1.9492e-04
Loss = 4.2530e-02, PNorm = 69.2179, GNorm = 0.4244, lr_0 = 1.9478e-04
Loss = 4.8664e-02, PNorm = 69.2236, GNorm = 0.5706, lr_0 = 1.9465e-04
Loss = 4.6684e-02, PNorm = 69.2281, GNorm = 0.5336, lr_0 = 1.9452e-04
Loss = 4.6492e-02, PNorm = 69.2311, GNorm = 0.7336, lr_0 = 1.9438e-04
Loss = 3.9340e-02, PNorm = 69.2331, GNorm = 0.4716, lr_0 = 1.9425e-04
Loss = 4.2699e-02, PNorm = 69.2350, GNorm = 0.4659, lr_0 = 1.9412e-04
Loss = 4.8256e-02, PNorm = 69.2361, GNorm = 0.6814, lr_0 = 1.9398e-04
Loss = 4.3588e-02, PNorm = 69.2378, GNorm = 0.4808, lr_0 = 1.9385e-04
Loss = 5.0181e-02, PNorm = 69.2406, GNorm = 0.7715, lr_0 = 1.9372e-04
Loss = 4.8014e-02, PNorm = 69.2430, GNorm = 0.5723, lr_0 = 1.9359e-04
Loss = 4.5248e-02, PNorm = 69.2435, GNorm = 0.5593, lr_0 = 1.9345e-04
Loss = 4.3696e-02, PNorm = 69.2464, GNorm = 0.4437, lr_0 = 1.9332e-04
Loss = 4.3119e-02, PNorm = 69.2486, GNorm = 0.4711, lr_0 = 1.9319e-04
Loss = 4.2277e-02, PNorm = 69.2524, GNorm = 0.4243, lr_0 = 1.9306e-04
Validation mae = 0.386795
Epoch 22
Loss = 4.6431e-02, PNorm = 69.2556, GNorm = 0.6095, lr_0 = 1.9292e-04
Loss = 3.4027e-02, PNorm = 69.2598, GNorm = 0.6127, lr_0 = 1.9279e-04
Loss = 4.3298e-02, PNorm = 69.2629, GNorm = 0.4627, lr_0 = 1.9266e-04
Loss = 3.3642e-02, PNorm = 69.2669, GNorm = 0.4079, lr_0 = 1.9253e-04
Loss = 3.7190e-02, PNorm = 69.2705, GNorm = 0.5587, lr_0 = 1.9240e-04
Loss = 3.9633e-02, PNorm = 69.2739, GNorm = 0.3943, lr_0 = 1.9226e-04
Loss = 3.6819e-02, PNorm = 69.2785, GNorm = 0.3933, lr_0 = 1.9213e-04
Loss = 4.0136e-02, PNorm = 69.2830, GNorm = 0.4373, lr_0 = 1.9200e-04
Loss = 3.7566e-02, PNorm = 69.2846, GNorm = 0.4903, lr_0 = 1.9187e-04
Loss = 4.0345e-02, PNorm = 69.2860, GNorm = 0.4988, lr_0 = 1.9174e-04
Loss = 4.3981e-02, PNorm = 69.2896, GNorm = 0.3921, lr_0 = 1.9161e-04
Loss = 3.7020e-02, PNorm = 69.2932, GNorm = 0.5942, lr_0 = 1.9148e-04
Loss = 3.6372e-02, PNorm = 69.2946, GNorm = 0.5099, lr_0 = 1.9134e-04
Loss = 3.7748e-02, PNorm = 69.2962, GNorm = 0.4213, lr_0 = 1.9121e-04
Loss = 4.0409e-02, PNorm = 69.2993, GNorm = 0.3823, lr_0 = 1.9108e-04
Loss = 4.1134e-02, PNorm = 69.3031, GNorm = 0.4427, lr_0 = 1.9095e-04
Loss = 4.1391e-02, PNorm = 69.3046, GNorm = 0.6225, lr_0 = 1.9082e-04
Loss = 4.3536e-02, PNorm = 69.3061, GNorm = 0.6361, lr_0 = 1.9069e-04
Loss = 3.8891e-02, PNorm = 69.3095, GNorm = 0.4569, lr_0 = 1.9056e-04
Loss = 3.6947e-02, PNorm = 69.3125, GNorm = 0.4377, lr_0 = 1.9043e-04
Loss = 3.8251e-02, PNorm = 69.3155, GNorm = 0.4846, lr_0 = 1.9030e-04
Loss = 4.3463e-02, PNorm = 69.3194, GNorm = 0.7883, lr_0 = 1.9017e-04
Loss = 3.8191e-02, PNorm = 69.3237, GNorm = 0.7299, lr_0 = 1.9004e-04
Loss = 4.0862e-02, PNorm = 69.3268, GNorm = 0.4256, lr_0 = 1.8991e-04
Loss = 4.2057e-02, PNorm = 69.3292, GNorm = 0.3697, lr_0 = 1.8978e-04
Loss = 4.0008e-02, PNorm = 69.3320, GNorm = 0.4411, lr_0 = 1.8965e-04
Loss = 3.9511e-02, PNorm = 69.3348, GNorm = 0.7767, lr_0 = 1.8952e-04
Loss = 3.8819e-02, PNorm = 69.3361, GNorm = 0.4823, lr_0 = 1.8939e-04
Loss = 4.0728e-02, PNorm = 69.3392, GNorm = 0.5087, lr_0 = 1.8926e-04
Loss = 4.1918e-02, PNorm = 69.3422, GNorm = 0.4182, lr_0 = 1.8913e-04
Loss = 4.7710e-02, PNorm = 69.3446, GNorm = 0.4781, lr_0 = 1.8900e-04
Loss = 3.9730e-02, PNorm = 69.3476, GNorm = 0.5205, lr_0 = 1.8887e-04
Loss = 4.4963e-02, PNorm = 69.3509, GNorm = 0.8324, lr_0 = 1.8874e-04
Loss = 3.8559e-02, PNorm = 69.3528, GNorm = 0.4039, lr_0 = 1.8861e-04
Loss = 4.0581e-02, PNorm = 69.3573, GNorm = 0.6661, lr_0 = 1.8848e-04
Loss = 3.5506e-02, PNorm = 69.3599, GNorm = 0.3771, lr_0 = 1.8835e-04
Loss = 4.5071e-02, PNorm = 69.3614, GNorm = 0.5026, lr_0 = 1.8822e-04
Loss = 4.0023e-02, PNorm = 69.3628, GNorm = 0.3953, lr_0 = 1.8809e-04
Loss = 3.9321e-02, PNorm = 69.3645, GNorm = 0.4433, lr_0 = 1.8797e-04
Loss = 3.7599e-02, PNorm = 69.3671, GNorm = 0.5820, lr_0 = 1.8784e-04
Loss = 4.4656e-02, PNorm = 69.3699, GNorm = 0.6981, lr_0 = 1.8771e-04
Loss = 4.2254e-02, PNorm = 69.3731, GNorm = 0.6156, lr_0 = 1.8758e-04
Loss = 3.9265e-02, PNorm = 69.3782, GNorm = 0.4135, lr_0 = 1.8745e-04
Loss = 4.1860e-02, PNorm = 69.3814, GNorm = 0.9908, lr_0 = 1.8732e-04
Loss = 3.8253e-02, PNorm = 69.3853, GNorm = 0.6010, lr_0 = 1.8719e-04
Loss = 4.8415e-02, PNorm = 69.3874, GNorm = 0.7164, lr_0 = 1.8707e-04
Loss = 4.3539e-02, PNorm = 69.3904, GNorm = 0.4925, lr_0 = 1.8694e-04
Loss = 4.3477e-02, PNorm = 69.3924, GNorm = 0.6208, lr_0 = 1.8681e-04
Loss = 5.0429e-02, PNorm = 69.3955, GNorm = 0.6175, lr_0 = 1.8668e-04
Loss = 4.4356e-02, PNorm = 69.3985, GNorm = 0.6837, lr_0 = 1.8655e-04
Loss = 3.6531e-02, PNorm = 69.3999, GNorm = 0.4591, lr_0 = 1.8643e-04
Loss = 4.2884e-02, PNorm = 69.4032, GNorm = 0.4645, lr_0 = 1.8630e-04
Loss = 4.8396e-02, PNorm = 69.4067, GNorm = 0.6032, lr_0 = 1.8617e-04
Loss = 4.6795e-02, PNorm = 69.4114, GNorm = 0.7063, lr_0 = 1.8604e-04
Loss = 4.1974e-02, PNorm = 69.4150, GNorm = 0.5405, lr_0 = 1.8592e-04
Loss = 4.2633e-02, PNorm = 69.4183, GNorm = 0.8003, lr_0 = 1.8579e-04
Loss = 4.2514e-02, PNorm = 69.4218, GNorm = 0.6287, lr_0 = 1.8566e-04
Loss = 4.3020e-02, PNorm = 69.4249, GNorm = 0.5177, lr_0 = 1.8553e-04
Loss = 3.9199e-02, PNorm = 69.4287, GNorm = 0.5347, lr_0 = 1.8541e-04
Loss = 3.7559e-02, PNorm = 69.4322, GNorm = 0.4421, lr_0 = 1.8528e-04
Loss = 3.7100e-02, PNorm = 69.4349, GNorm = 0.4006, lr_0 = 1.8515e-04
Loss = 3.4421e-02, PNorm = 69.4369, GNorm = 0.6222, lr_0 = 1.8503e-04
Loss = 4.4872e-02, PNorm = 69.4404, GNorm = 0.6517, lr_0 = 1.8490e-04
Loss = 4.5458e-02, PNorm = 69.4432, GNorm = 0.8682, lr_0 = 1.8477e-04
Loss = 5.0600e-02, PNorm = 69.4461, GNorm = 0.3847, lr_0 = 1.8465e-04
Loss = 4.9765e-02, PNorm = 69.4491, GNorm = 0.5547, lr_0 = 1.8452e-04
Loss = 4.4231e-02, PNorm = 69.4532, GNorm = 0.4233, lr_0 = 1.8439e-04
Loss = 4.0817e-02, PNorm = 69.4569, GNorm = 0.4867, lr_0 = 1.8427e-04
Loss = 3.7862e-02, PNorm = 69.4597, GNorm = 0.5854, lr_0 = 1.8414e-04
Loss = 4.2512e-02, PNorm = 69.4620, GNorm = 0.5682, lr_0 = 1.8401e-04
Loss = 4.5346e-02, PNorm = 69.4647, GNorm = 0.6490, lr_0 = 1.8389e-04
Loss = 4.3311e-02, PNorm = 69.4666, GNorm = 0.5016, lr_0 = 1.8376e-04
Loss = 4.0854e-02, PNorm = 69.4697, GNorm = 0.3796, lr_0 = 1.8364e-04
Loss = 4.8209e-02, PNorm = 69.4729, GNorm = 0.4652, lr_0 = 1.8351e-04
Loss = 4.3086e-02, PNorm = 69.4769, GNorm = 0.5753, lr_0 = 1.8338e-04
Loss = 3.6593e-02, PNorm = 69.4801, GNorm = 0.5704, lr_0 = 1.8326e-04
Loss = 4.1572e-02, PNorm = 69.4825, GNorm = 0.5688, lr_0 = 1.8313e-04
Loss = 4.4701e-02, PNorm = 69.4845, GNorm = 0.5341, lr_0 = 1.8301e-04
Loss = 4.0336e-02, PNorm = 69.4844, GNorm = 0.5399, lr_0 = 1.8288e-04
Loss = 4.1355e-02, PNorm = 69.4856, GNorm = 0.3925, lr_0 = 1.8276e-04
Loss = 4.1379e-02, PNorm = 69.4887, GNorm = 0.5734, lr_0 = 1.8263e-04
Loss = 3.8707e-02, PNorm = 69.4932, GNorm = 0.6602, lr_0 = 1.8251e-04
Loss = 4.5589e-02, PNorm = 69.4978, GNorm = 0.5840, lr_0 = 1.8238e-04
Loss = 3.9857e-02, PNorm = 69.5016, GNorm = 0.4306, lr_0 = 1.8226e-04
Loss = 4.0701e-02, PNorm = 69.5033, GNorm = 0.7867, lr_0 = 1.8213e-04
Loss = 3.9654e-02, PNorm = 69.5046, GNorm = 0.5398, lr_0 = 1.8201e-04
Loss = 4.7446e-02, PNorm = 69.5067, GNorm = 0.6433, lr_0 = 1.8188e-04
Loss = 4.6109e-02, PNorm = 69.5092, GNorm = 0.5166, lr_0 = 1.8176e-04
Loss = 4.4976e-02, PNorm = 69.5120, GNorm = 0.7267, lr_0 = 1.8163e-04
Loss = 4.3734e-02, PNorm = 69.5135, GNorm = 0.3985, lr_0 = 1.8151e-04
Loss = 4.4102e-02, PNorm = 69.5170, GNorm = 0.5085, lr_0 = 1.8138e-04
Loss = 4.7375e-02, PNorm = 69.5213, GNorm = 0.5442, lr_0 = 1.8126e-04
Loss = 4.6760e-02, PNorm = 69.5241, GNorm = 0.6197, lr_0 = 1.8114e-04
Loss = 4.3678e-02, PNorm = 69.5266, GNorm = 0.6486, lr_0 = 1.8101e-04
Loss = 4.4926e-02, PNorm = 69.5284, GNorm = 0.5527, lr_0 = 1.8089e-04
Loss = 4.2868e-02, PNorm = 69.5296, GNorm = 0.4444, lr_0 = 1.8076e-04
Loss = 4.5768e-02, PNorm = 69.5324, GNorm = 0.5584, lr_0 = 1.8064e-04
Loss = 3.8806e-02, PNorm = 69.5346, GNorm = 0.5813, lr_0 = 1.8052e-04
Loss = 6.1617e-02, PNorm = 69.5375, GNorm = 1.4799, lr_0 = 1.8039e-04
Loss = 3.9637e-02, PNorm = 69.5409, GNorm = 0.5726, lr_0 = 1.8027e-04
Loss = 4.5178e-02, PNorm = 69.5441, GNorm = 0.5747, lr_0 = 1.8015e-04
Loss = 4.4011e-02, PNorm = 69.5462, GNorm = 0.4413, lr_0 = 1.8002e-04
Loss = 4.4801e-02, PNorm = 69.5502, GNorm = 0.7545, lr_0 = 1.7990e-04
Loss = 4.4071e-02, PNorm = 69.5533, GNorm = 0.5481, lr_0 = 1.7978e-04
Loss = 4.9780e-02, PNorm = 69.5562, GNorm = 0.7084, lr_0 = 1.7965e-04
Loss = 3.8060e-02, PNorm = 69.5593, GNorm = 0.4083, lr_0 = 1.7953e-04
Loss = 4.2597e-02, PNorm = 69.5621, GNorm = 0.5701, lr_0 = 1.7941e-04
Loss = 4.4869e-02, PNorm = 69.5670, GNorm = 0.5082, lr_0 = 1.7928e-04
Loss = 3.9403e-02, PNorm = 69.5708, GNorm = 0.5737, lr_0 = 1.7916e-04
Loss = 5.0674e-02, PNorm = 69.5744, GNorm = 0.5979, lr_0 = 1.7904e-04
Loss = 4.5336e-02, PNorm = 69.5783, GNorm = 0.6460, lr_0 = 1.7892e-04
Loss = 4.1410e-02, PNorm = 69.5791, GNorm = 0.4267, lr_0 = 1.7879e-04
Loss = 4.4149e-02, PNorm = 69.5811, GNorm = 0.4492, lr_0 = 1.7867e-04
Loss = 5.2371e-02, PNorm = 69.5834, GNorm = 0.7123, lr_0 = 1.7855e-04
Loss = 4.1523e-02, PNorm = 69.5863, GNorm = 0.5087, lr_0 = 1.7843e-04
Loss = 4.4135e-02, PNorm = 69.5896, GNorm = 0.6069, lr_0 = 1.7830e-04
Loss = 3.9179e-02, PNorm = 69.5940, GNorm = 0.6858, lr_0 = 1.7818e-04
Loss = 3.9919e-02, PNorm = 69.5971, GNorm = 0.3647, lr_0 = 1.7806e-04
Loss = 5.7669e-02, PNorm = 69.5996, GNorm = 0.4910, lr_0 = 1.7794e-04
Loss = 3.8225e-02, PNorm = 69.6022, GNorm = 0.5220, lr_0 = 1.7782e-04
Validation mae = 0.386924
Epoch 23
Loss = 3.7303e-02, PNorm = 69.6044, GNorm = 0.4610, lr_0 = 1.7769e-04
Loss = 3.8218e-02, PNorm = 69.6078, GNorm = 0.5099, lr_0 = 1.7757e-04
Loss = 3.5461e-02, PNorm = 69.6103, GNorm = 0.4508, lr_0 = 1.7745e-04
Loss = 3.2171e-02, PNorm = 69.6127, GNorm = 0.4701, lr_0 = 1.7733e-04
Loss = 3.3952e-02, PNorm = 69.6157, GNorm = 0.5250, lr_0 = 1.7721e-04
Loss = 3.3897e-02, PNorm = 69.6197, GNorm = 0.4769, lr_0 = 1.7709e-04
Loss = 4.0822e-02, PNorm = 69.6233, GNorm = 0.4157, lr_0 = 1.7696e-04
Loss = 3.2519e-02, PNorm = 69.6249, GNorm = 0.3977, lr_0 = 1.7684e-04
Loss = 3.4879e-02, PNorm = 69.6274, GNorm = 0.5112, lr_0 = 1.7672e-04
Loss = 3.3719e-02, PNorm = 69.6306, GNorm = 0.4734, lr_0 = 1.7660e-04
Loss = 3.7131e-02, PNorm = 69.6336, GNorm = 0.4067, lr_0 = 1.7648e-04
Loss = 4.0557e-02, PNorm = 69.6362, GNorm = 0.6159, lr_0 = 1.7636e-04
Loss = 4.2279e-02, PNorm = 69.6378, GNorm = 0.4988, lr_0 = 1.7624e-04
Loss = 3.8375e-02, PNorm = 69.6391, GNorm = 0.5164, lr_0 = 1.7612e-04
Loss = 3.8184e-02, PNorm = 69.6412, GNorm = 0.3868, lr_0 = 1.7600e-04
Loss = 4.0134e-02, PNorm = 69.6437, GNorm = 0.4078, lr_0 = 1.7588e-04
Loss = 4.4758e-02, PNorm = 69.6467, GNorm = 0.4295, lr_0 = 1.7576e-04
Loss = 4.6482e-02, PNorm = 69.6500, GNorm = 0.5321, lr_0 = 1.7564e-04
Loss = 3.5518e-02, PNorm = 69.6535, GNorm = 0.4459, lr_0 = 1.7552e-04
Loss = 4.1922e-02, PNorm = 69.6569, GNorm = 0.5433, lr_0 = 1.7540e-04
Loss = 3.8640e-02, PNorm = 69.6608, GNorm = 0.4753, lr_0 = 1.7528e-04
Loss = 3.4837e-02, PNorm = 69.6632, GNorm = 0.4333, lr_0 = 1.7516e-04
Loss = 3.8444e-02, PNorm = 69.6648, GNorm = 0.3889, lr_0 = 1.7504e-04
Loss = 4.3295e-02, PNorm = 69.6670, GNorm = 0.5526, lr_0 = 1.7492e-04
Loss = 3.7574e-02, PNorm = 69.6684, GNorm = 0.4348, lr_0 = 1.7480e-04
Loss = 4.2800e-02, PNorm = 69.6713, GNorm = 0.5933, lr_0 = 1.7468e-04
Loss = 4.1722e-02, PNorm = 69.6740, GNorm = 0.4692, lr_0 = 1.7456e-04
Loss = 3.5125e-02, PNorm = 69.6766, GNorm = 0.3584, lr_0 = 1.7444e-04
Loss = 3.7100e-02, PNorm = 69.6786, GNorm = 0.6045, lr_0 = 1.7432e-04
Loss = 3.6593e-02, PNorm = 69.6810, GNorm = 0.3866, lr_0 = 1.7420e-04
Loss = 4.0760e-02, PNorm = 69.6830, GNorm = 0.4603, lr_0 = 1.7408e-04
Loss = 4.0913e-02, PNorm = 69.6853, GNorm = 0.4981, lr_0 = 1.7396e-04
Loss = 3.4651e-02, PNorm = 69.6875, GNorm = 0.4739, lr_0 = 1.7384e-04
Loss = 3.8058e-02, PNorm = 69.6895, GNorm = 0.3737, lr_0 = 1.7372e-04
Loss = 3.9773e-02, PNorm = 69.6918, GNorm = 0.5397, lr_0 = 1.7360e-04
Loss = 3.6146e-02, PNorm = 69.6959, GNorm = 0.4112, lr_0 = 1.7348e-04
Loss = 4.0712e-02, PNorm = 69.6994, GNorm = 0.6643, lr_0 = 1.7336e-04
Loss = 4.1856e-02, PNorm = 69.7014, GNorm = 0.4039, lr_0 = 1.7325e-04
Loss = 3.1405e-02, PNorm = 69.7019, GNorm = 0.5969, lr_0 = 1.7313e-04
Loss = 3.4609e-02, PNorm = 69.7031, GNorm = 0.6984, lr_0 = 1.7301e-04
Loss = 3.6543e-02, PNorm = 69.7049, GNorm = 0.5061, lr_0 = 1.7289e-04
Loss = 3.9882e-02, PNorm = 69.7077, GNorm = 0.5453, lr_0 = 1.7277e-04
Loss = 3.7089e-02, PNorm = 69.7105, GNorm = 0.4207, lr_0 = 1.7265e-04
Loss = 3.4022e-02, PNorm = 69.7119, GNorm = 0.5702, lr_0 = 1.7253e-04
Loss = 3.5501e-02, PNorm = 69.7132, GNorm = 0.4599, lr_0 = 1.7242e-04
Loss = 4.8098e-02, PNorm = 69.7159, GNorm = 0.6679, lr_0 = 1.7230e-04
Loss = 3.7086e-02, PNorm = 69.7188, GNorm = 0.3953, lr_0 = 1.7218e-04
Loss = 4.3045e-02, PNorm = 69.7213, GNorm = 0.4923, lr_0 = 1.7206e-04
Loss = 4.3870e-02, PNorm = 69.7221, GNorm = 0.7008, lr_0 = 1.7194e-04
Loss = 4.2784e-02, PNorm = 69.7245, GNorm = 0.4486, lr_0 = 1.7183e-04
Loss = 4.0046e-02, PNorm = 69.7270, GNorm = 0.4086, lr_0 = 1.7171e-04
Loss = 3.2847e-02, PNorm = 69.7298, GNorm = 0.4922, lr_0 = 1.7159e-04
Loss = 4.2506e-02, PNorm = 69.7327, GNorm = 0.6429, lr_0 = 1.7147e-04
Loss = 3.9404e-02, PNorm = 69.7359, GNorm = 0.3919, lr_0 = 1.7136e-04
Loss = 4.0802e-02, PNorm = 69.7397, GNorm = 0.7207, lr_0 = 1.7124e-04
Loss = 3.7730e-02, PNorm = 69.7438, GNorm = 0.4492, lr_0 = 1.7112e-04
Loss = 3.4428e-02, PNorm = 69.7473, GNorm = 0.3790, lr_0 = 1.7100e-04
Loss = 3.6819e-02, PNorm = 69.7497, GNorm = 0.6796, lr_0 = 1.7089e-04
Loss = 4.3275e-02, PNorm = 69.7527, GNorm = 0.7198, lr_0 = 1.7077e-04
Loss = 4.2809e-02, PNorm = 69.7569, GNorm = 0.3728, lr_0 = 1.7065e-04
Loss = 5.1449e-02, PNorm = 69.7592, GNorm = 0.4146, lr_0 = 1.7054e-04
Loss = 4.2831e-02, PNorm = 69.7621, GNorm = 0.8964, lr_0 = 1.7042e-04
Loss = 4.4967e-02, PNorm = 69.7639, GNorm = 0.5170, lr_0 = 1.7030e-04
Loss = 4.6322e-02, PNorm = 69.7656, GNorm = 0.7784, lr_0 = 1.7019e-04
Loss = 4.1205e-02, PNorm = 69.7700, GNorm = 0.3913, lr_0 = 1.7007e-04
Loss = 3.3525e-02, PNorm = 69.7738, GNorm = 0.4359, lr_0 = 1.6995e-04
Loss = 3.3943e-02, PNorm = 69.7764, GNorm = 0.4129, lr_0 = 1.6984e-04
Loss = 3.8910e-02, PNorm = 69.7777, GNorm = 0.4419, lr_0 = 1.6972e-04
Loss = 3.8379e-02, PNorm = 69.7794, GNorm = 0.7400, lr_0 = 1.6960e-04
Loss = 4.1218e-02, PNorm = 69.7813, GNorm = 0.5457, lr_0 = 1.6949e-04
Loss = 3.7066e-02, PNorm = 69.7831, GNorm = 0.6381, lr_0 = 1.6937e-04
Loss = 3.6752e-02, PNorm = 69.7842, GNorm = 0.6529, lr_0 = 1.6926e-04
Loss = 4.0917e-02, PNorm = 69.7869, GNorm = 0.4448, lr_0 = 1.6914e-04
Loss = 4.0188e-02, PNorm = 69.7908, GNorm = 0.5898, lr_0 = 1.6902e-04
Loss = 3.3709e-02, PNorm = 69.7926, GNorm = 0.4237, lr_0 = 1.6891e-04
Loss = 3.2955e-02, PNorm = 69.7944, GNorm = 0.6220, lr_0 = 1.6879e-04
Loss = 4.0951e-02, PNorm = 69.7967, GNorm = 0.5756, lr_0 = 1.6868e-04
Loss = 4.4193e-02, PNorm = 69.7990, GNorm = 0.4418, lr_0 = 1.6856e-04
Loss = 4.2476e-02, PNorm = 69.8019, GNorm = 0.4434, lr_0 = 1.6845e-04
Loss = 4.0885e-02, PNorm = 69.8033, GNorm = 0.5953, lr_0 = 1.6833e-04
Loss = 3.8875e-02, PNorm = 69.8056, GNorm = 0.4129, lr_0 = 1.6821e-04
Loss = 4.5131e-02, PNorm = 69.8085, GNorm = 0.5934, lr_0 = 1.6810e-04
Loss = 4.2654e-02, PNorm = 69.8128, GNorm = 0.6551, lr_0 = 1.6798e-04
Loss = 3.3837e-02, PNorm = 69.8171, GNorm = 0.4489, lr_0 = 1.6787e-04
Loss = 4.7198e-02, PNorm = 69.8186, GNorm = 0.4407, lr_0 = 1.6775e-04
Loss = 3.9820e-02, PNorm = 69.8209, GNorm = 0.4920, lr_0 = 1.6764e-04
Loss = 3.5503e-02, PNorm = 69.8233, GNorm = 0.4389, lr_0 = 1.6752e-04
Loss = 4.7135e-02, PNorm = 69.8245, GNorm = 1.1893, lr_0 = 1.6741e-04
Loss = 4.2955e-02, PNorm = 69.8263, GNorm = 0.5302, lr_0 = 1.6729e-04
Loss = 4.1735e-02, PNorm = 69.8290, GNorm = 0.5536, lr_0 = 1.6718e-04
Loss = 3.4145e-02, PNorm = 69.8323, GNorm = 0.4593, lr_0 = 1.6707e-04
Loss = 4.2523e-02, PNorm = 69.8351, GNorm = 0.6129, lr_0 = 1.6695e-04
Loss = 4.0715e-02, PNorm = 69.8371, GNorm = 0.4727, lr_0 = 1.6684e-04
Loss = 3.7570e-02, PNorm = 69.8400, GNorm = 0.5676, lr_0 = 1.6672e-04
Loss = 4.2970e-02, PNorm = 69.8428, GNorm = 0.4879, lr_0 = 1.6661e-04
Loss = 4.6852e-02, PNorm = 69.8456, GNorm = 1.1269, lr_0 = 1.6649e-04
Loss = 4.9379e-02, PNorm = 69.8489, GNorm = 0.5622, lr_0 = 1.6638e-04
Loss = 4.1997e-02, PNorm = 69.8524, GNorm = 0.7069, lr_0 = 1.6627e-04
Loss = 4.0012e-02, PNorm = 69.8559, GNorm = 0.3829, lr_0 = 1.6615e-04
Loss = 3.9438e-02, PNorm = 69.8568, GNorm = 0.3953, lr_0 = 1.6604e-04
Loss = 4.2290e-02, PNorm = 69.8585, GNorm = 0.7505, lr_0 = 1.6592e-04
Loss = 3.9922e-02, PNorm = 69.8613, GNorm = 0.5120, lr_0 = 1.6581e-04
Loss = 3.3370e-02, PNorm = 69.8648, GNorm = 0.4326, lr_0 = 1.6570e-04
Loss = 4.5587e-02, PNorm = 69.8679, GNorm = 0.5639, lr_0 = 1.6558e-04
Loss = 3.6507e-02, PNorm = 69.8700, GNorm = 0.3962, lr_0 = 1.6547e-04
Loss = 3.7994e-02, PNorm = 69.8704, GNorm = 0.4975, lr_0 = 1.6536e-04
Loss = 4.2128e-02, PNorm = 69.8718, GNorm = 0.5054, lr_0 = 1.6524e-04
Loss = 3.9805e-02, PNorm = 69.8741, GNorm = 0.4249, lr_0 = 1.6513e-04
Loss = 4.2160e-02, PNorm = 69.8767, GNorm = 0.5469, lr_0 = 1.6502e-04
Loss = 4.3705e-02, PNorm = 69.8805, GNorm = 0.5865, lr_0 = 1.6490e-04
Loss = 4.0335e-02, PNorm = 69.8832, GNorm = 0.5172, lr_0 = 1.6479e-04
Loss = 3.9341e-02, PNorm = 69.8856, GNorm = 0.3794, lr_0 = 1.6468e-04
Loss = 3.5909e-02, PNorm = 69.8884, GNorm = 0.7832, lr_0 = 1.6457e-04
Loss = 4.7854e-02, PNorm = 69.8905, GNorm = 0.5818, lr_0 = 1.6445e-04
Loss = 3.9518e-02, PNorm = 69.8928, GNorm = 0.4618, lr_0 = 1.6434e-04
Loss = 4.6107e-02, PNorm = 69.8944, GNorm = 0.4448, lr_0 = 1.6423e-04
Loss = 4.6592e-02, PNorm = 69.8956, GNorm = 0.4758, lr_0 = 1.6412e-04
Loss = 4.3361e-02, PNorm = 69.8983, GNorm = 0.7176, lr_0 = 1.6400e-04
Loss = 3.8360e-02, PNorm = 69.9004, GNorm = 0.5456, lr_0 = 1.6389e-04
Loss = 3.7413e-02, PNorm = 69.9030, GNorm = 0.4829, lr_0 = 1.6378e-04
Validation mae = 0.388656
Epoch 24
Loss = 3.5145e-02, PNorm = 69.9060, GNorm = 0.3405, lr_0 = 1.6367e-04
Loss = 3.3733e-02, PNorm = 69.9084, GNorm = 0.4842, lr_0 = 1.6355e-04
Loss = 3.1867e-02, PNorm = 69.9110, GNorm = 0.5096, lr_0 = 1.6344e-04
Loss = 3.0572e-02, PNorm = 69.9122, GNorm = 0.4063, lr_0 = 1.6333e-04
Loss = 3.4841e-02, PNorm = 69.9140, GNorm = 0.4478, lr_0 = 1.6322e-04
Loss = 4.1205e-02, PNorm = 69.9168, GNorm = 0.6724, lr_0 = 1.6311e-04
Loss = 3.8415e-02, PNorm = 69.9185, GNorm = 0.4263, lr_0 = 1.6299e-04
Loss = 2.9242e-02, PNorm = 69.9200, GNorm = 0.3989, lr_0 = 1.6288e-04
Loss = 3.7262e-02, PNorm = 69.9229, GNorm = 0.6439, lr_0 = 1.6277e-04
Loss = 3.0190e-02, PNorm = 69.9257, GNorm = 0.3828, lr_0 = 1.6266e-04
Loss = 3.3201e-02, PNorm = 69.9279, GNorm = 0.5471, lr_0 = 1.6255e-04
Loss = 3.7944e-02, PNorm = 69.9307, GNorm = 0.3921, lr_0 = 1.6244e-04
Loss = 3.7958e-02, PNorm = 69.9347, GNorm = 0.5501, lr_0 = 1.6233e-04
Loss = 3.2056e-02, PNorm = 69.9383, GNorm = 0.3286, lr_0 = 1.6221e-04
Loss = 4.1000e-02, PNorm = 69.9422, GNorm = 0.5172, lr_0 = 1.6210e-04
Loss = 3.9822e-02, PNorm = 69.9450, GNorm = 0.6861, lr_0 = 1.6199e-04
Loss = 3.2413e-02, PNorm = 69.9470, GNorm = 0.4141, lr_0 = 1.6188e-04
Loss = 4.0235e-02, PNorm = 69.9494, GNorm = 0.4659, lr_0 = 1.6177e-04
Loss = 3.6621e-02, PNorm = 69.9516, GNorm = 0.3966, lr_0 = 1.6166e-04
Loss = 3.4958e-02, PNorm = 69.9536, GNorm = 0.4829, lr_0 = 1.6155e-04
Loss = 3.2438e-02, PNorm = 69.9564, GNorm = 0.4768, lr_0 = 1.6144e-04
Loss = 3.3922e-02, PNorm = 69.9587, GNorm = 0.4409, lr_0 = 1.6133e-04
Loss = 3.1210e-02, PNorm = 69.9610, GNorm = 0.3779, lr_0 = 1.6122e-04
Loss = 3.6811e-02, PNorm = 69.9635, GNorm = 0.5507, lr_0 = 1.6111e-04
Loss = 3.1822e-02, PNorm = 69.9657, GNorm = 0.4088, lr_0 = 1.6100e-04
Loss = 4.3439e-02, PNorm = 69.9688, GNorm = 0.5448, lr_0 = 1.6089e-04
Loss = 4.2167e-02, PNorm = 69.9712, GNorm = 0.7539, lr_0 = 1.6078e-04
Loss = 3.7428e-02, PNorm = 69.9732, GNorm = 0.3913, lr_0 = 1.6067e-04
Loss = 3.7214e-02, PNorm = 69.9759, GNorm = 0.6936, lr_0 = 1.6056e-04
Loss = 3.6493e-02, PNorm = 69.9779, GNorm = 0.4082, lr_0 = 1.6045e-04
Loss = 3.6645e-02, PNorm = 69.9805, GNorm = 0.5226, lr_0 = 1.6034e-04
Loss = 4.4041e-02, PNorm = 69.9839, GNorm = 0.4132, lr_0 = 1.6023e-04
Loss = 3.3744e-02, PNorm = 69.9852, GNorm = 0.5203, lr_0 = 1.6012e-04
Loss = 3.1584e-02, PNorm = 69.9859, GNorm = 0.4063, lr_0 = 1.6001e-04
Loss = 3.9549e-02, PNorm = 69.9877, GNorm = 0.5455, lr_0 = 1.5990e-04
Loss = 3.5982e-02, PNorm = 69.9902, GNorm = 0.3802, lr_0 = 1.5979e-04
Loss = 3.9880e-02, PNorm = 69.9918, GNorm = 0.5604, lr_0 = 1.5968e-04
Loss = 3.7595e-02, PNorm = 69.9943, GNorm = 0.5619, lr_0 = 1.5957e-04
Loss = 3.0425e-02, PNorm = 69.9979, GNorm = 0.4331, lr_0 = 1.5946e-04
Loss = 4.1882e-02, PNorm = 70.0007, GNorm = 0.7155, lr_0 = 1.5935e-04
Loss = 3.3472e-02, PNorm = 70.0031, GNorm = 0.4945, lr_0 = 1.5924e-04
Loss = 3.9843e-02, PNorm = 70.0064, GNorm = 0.5664, lr_0 = 1.5913e-04
Loss = 3.7601e-02, PNorm = 70.0100, GNorm = 0.4561, lr_0 = 1.5902e-04
Loss = 3.8647e-02, PNorm = 70.0122, GNorm = 0.4404, lr_0 = 1.5891e-04
Loss = 4.1772e-02, PNorm = 70.0149, GNorm = 0.5397, lr_0 = 1.5880e-04
Loss = 4.0220e-02, PNorm = 70.0183, GNorm = 0.5670, lr_0 = 1.5870e-04
Loss = 3.8733e-02, PNorm = 70.0200, GNorm = 0.4911, lr_0 = 1.5859e-04
Loss = 3.8270e-02, PNorm = 70.0209, GNorm = 0.5827, lr_0 = 1.5848e-04
Loss = 4.1402e-02, PNorm = 70.0225, GNorm = 0.8253, lr_0 = 1.5837e-04
Loss = 3.9458e-02, PNorm = 70.0249, GNorm = 0.6294, lr_0 = 1.5826e-04
Loss = 3.8601e-02, PNorm = 70.0270, GNorm = 0.8396, lr_0 = 1.5815e-04
Loss = 4.2565e-02, PNorm = 70.0302, GNorm = 0.4980, lr_0 = 1.5804e-04
Loss = 3.8040e-02, PNorm = 70.0336, GNorm = 0.7124, lr_0 = 1.5794e-04
Loss = 3.3971e-02, PNorm = 70.0358, GNorm = 0.3762, lr_0 = 1.5783e-04
Loss = 3.3584e-02, PNorm = 70.0377, GNorm = 0.4546, lr_0 = 1.5772e-04
Loss = 3.5190e-02, PNorm = 70.0420, GNorm = 0.3934, lr_0 = 1.5761e-04
Loss = 3.7069e-02, PNorm = 70.0446, GNorm = 0.3200, lr_0 = 1.5750e-04
Loss = 3.5740e-02, PNorm = 70.0475, GNorm = 0.4297, lr_0 = 1.5740e-04
Loss = 3.8547e-02, PNorm = 70.0503, GNorm = 0.6286, lr_0 = 1.5729e-04
Loss = 4.2458e-02, PNorm = 70.0522, GNorm = 0.6678, lr_0 = 1.5718e-04
Loss = 3.9014e-02, PNorm = 70.0548, GNorm = 0.4368, lr_0 = 1.5707e-04
Loss = 3.8410e-02, PNorm = 70.0566, GNorm = 0.4191, lr_0 = 1.5697e-04
Loss = 3.5678e-02, PNorm = 70.0585, GNorm = 0.3800, lr_0 = 1.5686e-04
Loss = 3.9145e-02, PNorm = 70.0608, GNorm = 0.7623, lr_0 = 1.5675e-04
Loss = 3.6514e-02, PNorm = 70.0622, GNorm = 0.5333, lr_0 = 1.5664e-04
Loss = 3.6309e-02, PNorm = 70.0639, GNorm = 0.5198, lr_0 = 1.5654e-04
Loss = 4.1714e-02, PNorm = 70.0664, GNorm = 0.4754, lr_0 = 1.5643e-04
Loss = 3.8598e-02, PNorm = 70.0685, GNorm = 0.4509, lr_0 = 1.5632e-04
Loss = 3.8100e-02, PNorm = 70.0724, GNorm = 0.4432, lr_0 = 1.5621e-04
Loss = 3.8490e-02, PNorm = 70.0739, GNorm = 0.4914, lr_0 = 1.5611e-04
Loss = 3.4914e-02, PNorm = 70.0747, GNorm = 0.8996, lr_0 = 1.5600e-04
Loss = 3.2998e-02, PNorm = 70.0761, GNorm = 0.5905, lr_0 = 1.5589e-04
Loss = 4.6973e-02, PNorm = 70.0768, GNorm = 0.7652, lr_0 = 1.5579e-04
Loss = 3.5494e-02, PNorm = 70.0789, GNorm = 0.4707, lr_0 = 1.5568e-04
Loss = 3.8139e-02, PNorm = 70.0810, GNorm = 0.7472, lr_0 = 1.5557e-04
Loss = 3.8470e-02, PNorm = 70.0832, GNorm = 0.5052, lr_0 = 1.5547e-04
Loss = 3.9529e-02, PNorm = 70.0853, GNorm = 0.6986, lr_0 = 1.5536e-04
Loss = 3.7120e-02, PNorm = 70.0881, GNorm = 0.6220, lr_0 = 1.5525e-04
Loss = 3.6346e-02, PNorm = 70.0894, GNorm = 0.4389, lr_0 = 1.5515e-04
Loss = 3.7005e-02, PNorm = 70.0908, GNorm = 0.4478, lr_0 = 1.5504e-04
Loss = 3.3234e-02, PNorm = 70.0945, GNorm = 0.5211, lr_0 = 1.5493e-04
Loss = 4.1246e-02, PNorm = 70.0977, GNorm = 0.5108, lr_0 = 1.5483e-04
Loss = 4.3685e-02, PNorm = 70.1012, GNorm = 0.5165, lr_0 = 1.5472e-04
Loss = 3.9379e-02, PNorm = 70.1035, GNorm = 0.5369, lr_0 = 1.5462e-04
Loss = 4.0311e-02, PNorm = 70.1045, GNorm = 0.5180, lr_0 = 1.5451e-04
Loss = 3.7559e-02, PNorm = 70.1059, GNorm = 0.5789, lr_0 = 1.5440e-04
Loss = 4.3972e-02, PNorm = 70.1062, GNorm = 0.5695, lr_0 = 1.5430e-04
Loss = 4.2828e-02, PNorm = 70.1083, GNorm = 0.5617, lr_0 = 1.5419e-04
Loss = 3.5205e-02, PNorm = 70.1108, GNorm = 0.4572, lr_0 = 1.5409e-04
Loss = 4.5053e-02, PNorm = 70.1129, GNorm = 0.6983, lr_0 = 1.5398e-04
Loss = 4.1140e-02, PNorm = 70.1160, GNorm = 0.4725, lr_0 = 1.5388e-04
Loss = 4.1393e-02, PNorm = 70.1190, GNorm = 0.6309, lr_0 = 1.5377e-04
Loss = 4.0649e-02, PNorm = 70.1218, GNorm = 0.6040, lr_0 = 1.5367e-04
Loss = 3.6936e-02, PNorm = 70.1257, GNorm = 0.5289, lr_0 = 1.5356e-04
Loss = 4.0569e-02, PNorm = 70.1292, GNorm = 0.6429, lr_0 = 1.5346e-04
Loss = 3.3470e-02, PNorm = 70.1308, GNorm = 0.5791, lr_0 = 1.5335e-04
Loss = 3.7947e-02, PNorm = 70.1334, GNorm = 0.4332, lr_0 = 1.5325e-04
Loss = 3.5156e-02, PNorm = 70.1354, GNorm = 0.5256, lr_0 = 1.5314e-04
Loss = 4.2239e-02, PNorm = 70.1371, GNorm = 0.7666, lr_0 = 1.5304e-04
Loss = 3.8183e-02, PNorm = 70.1381, GNorm = 0.4925, lr_0 = 1.5293e-04
Loss = 3.8664e-02, PNorm = 70.1391, GNorm = 0.5510, lr_0 = 1.5283e-04
Loss = 3.7971e-02, PNorm = 70.1405, GNorm = 0.5249, lr_0 = 1.5272e-04
Loss = 4.0780e-02, PNorm = 70.1431, GNorm = 1.0245, lr_0 = 1.5262e-04
Loss = 3.9823e-02, PNorm = 70.1454, GNorm = 0.5027, lr_0 = 1.5251e-04
Loss = 4.0019e-02, PNorm = 70.1480, GNorm = 0.5024, lr_0 = 1.5241e-04
Loss = 4.0823e-02, PNorm = 70.1514, GNorm = 0.5796, lr_0 = 1.5230e-04
Loss = 3.7037e-02, PNorm = 70.1547, GNorm = 0.4586, lr_0 = 1.5220e-04
Loss = 3.2594e-02, PNorm = 70.1569, GNorm = 0.3935, lr_0 = 1.5209e-04
Loss = 4.4436e-02, PNorm = 70.1579, GNorm = 0.7117, lr_0 = 1.5199e-04
Loss = 3.4180e-02, PNorm = 70.1594, GNorm = 0.6624, lr_0 = 1.5189e-04
Loss = 3.9548e-02, PNorm = 70.1607, GNorm = 0.4765, lr_0 = 1.5178e-04
Loss = 4.2544e-02, PNorm = 70.1638, GNorm = 0.7181, lr_0 = 1.5168e-04
Loss = 4.2502e-02, PNorm = 70.1669, GNorm = 0.5061, lr_0 = 1.5157e-04
Loss = 3.3296e-02, PNorm = 70.1689, GNorm = 0.4350, lr_0 = 1.5147e-04
Loss = 4.1783e-02, PNorm = 70.1710, GNorm = 0.8643, lr_0 = 1.5137e-04
Loss = 3.4505e-02, PNorm = 70.1734, GNorm = 0.4646, lr_0 = 1.5126e-04
Loss = 3.6184e-02, PNorm = 70.1758, GNorm = 0.4536, lr_0 = 1.5116e-04
Loss = 3.7792e-02, PNorm = 70.1781, GNorm = 0.3292, lr_0 = 1.5106e-04
Loss = 3.6406e-02, PNorm = 70.1811, GNorm = 0.4812, lr_0 = 1.5095e-04
Loss = 4.0622e-02, PNorm = 70.1829, GNorm = 0.5828, lr_0 = 1.5085e-04
Validation mae = 0.386814
Epoch 25
Loss = 3.5253e-02, PNorm = 70.1851, GNorm = 0.4579, lr_0 = 1.5075e-04
Loss = 3.7582e-02, PNorm = 70.1873, GNorm = 0.5373, lr_0 = 1.5064e-04
Loss = 3.0716e-02, PNorm = 70.1884, GNorm = 0.3778, lr_0 = 1.5054e-04
Loss = 3.3760e-02, PNorm = 70.1898, GNorm = 0.4652, lr_0 = 1.5044e-04
Loss = 3.2783e-02, PNorm = 70.1919, GNorm = 0.4534, lr_0 = 1.5033e-04
Loss = 2.7873e-02, PNorm = 70.1935, GNorm = 0.5336, lr_0 = 1.5023e-04
Loss = 3.7141e-02, PNorm = 70.1954, GNorm = 0.4910, lr_0 = 1.5013e-04
Loss = 3.0361e-02, PNorm = 70.1977, GNorm = 0.3351, lr_0 = 1.5002e-04
Loss = 4.0408e-02, PNorm = 70.1992, GNorm = 1.0286, lr_0 = 1.4992e-04
Loss = 3.8759e-02, PNorm = 70.2013, GNorm = 0.4866, lr_0 = 1.4982e-04
Loss = 3.2054e-02, PNorm = 70.2029, GNorm = 0.4944, lr_0 = 1.4972e-04
Loss = 3.1590e-02, PNorm = 70.2039, GNorm = 0.6657, lr_0 = 1.4961e-04
Loss = 3.2248e-02, PNorm = 70.2063, GNorm = 0.4234, lr_0 = 1.4951e-04
Loss = 3.6895e-02, PNorm = 70.2089, GNorm = 0.5547, lr_0 = 1.4941e-04
Loss = 3.2563e-02, PNorm = 70.2110, GNorm = 0.5368, lr_0 = 1.4931e-04
Loss = 3.6491e-02, PNorm = 70.2122, GNorm = 0.5384, lr_0 = 1.4920e-04
Loss = 3.9072e-02, PNorm = 70.2144, GNorm = 0.7174, lr_0 = 1.4910e-04
Loss = 3.3624e-02, PNorm = 70.2155, GNorm = 0.3614, lr_0 = 1.4900e-04
Loss = 3.5264e-02, PNorm = 70.2178, GNorm = 0.4854, lr_0 = 1.4890e-04
Loss = 3.3387e-02, PNorm = 70.2205, GNorm = 0.4248, lr_0 = 1.4880e-04
Loss = 3.3998e-02, PNorm = 70.2232, GNorm = 0.3907, lr_0 = 1.4869e-04
Loss = 3.1639e-02, PNorm = 70.2256, GNorm = 0.4062, lr_0 = 1.4859e-04
Loss = 3.5513e-02, PNorm = 70.2265, GNorm = 0.4798, lr_0 = 1.4849e-04
Loss = 3.5559e-02, PNorm = 70.2275, GNorm = 0.4710, lr_0 = 1.4839e-04
Loss = 3.9724e-02, PNorm = 70.2296, GNorm = 0.5335, lr_0 = 1.4829e-04
Loss = 3.6348e-02, PNorm = 70.2316, GNorm = 0.6512, lr_0 = 1.4818e-04
Loss = 2.9555e-02, PNorm = 70.2330, GNorm = 0.4144, lr_0 = 1.4808e-04
Loss = 3.2080e-02, PNorm = 70.2349, GNorm = 0.4318, lr_0 = 1.4798e-04
Loss = 3.4686e-02, PNorm = 70.2360, GNorm = 0.5509, lr_0 = 1.4788e-04
Loss = 4.0553e-02, PNorm = 70.2385, GNorm = 0.4362, lr_0 = 1.4778e-04
Loss = 3.4626e-02, PNorm = 70.2407, GNorm = 0.5329, lr_0 = 1.4768e-04
Loss = 3.1724e-02, PNorm = 70.2430, GNorm = 0.3902, lr_0 = 1.4758e-04
Loss = 3.6731e-02, PNorm = 70.2449, GNorm = 0.7814, lr_0 = 1.4748e-04
Loss = 4.2747e-02, PNorm = 70.2490, GNorm = 0.4714, lr_0 = 1.4737e-04
Loss = 3.3569e-02, PNorm = 70.2523, GNorm = 0.5825, lr_0 = 1.4727e-04
Loss = 3.8863e-02, PNorm = 70.2552, GNorm = 0.6198, lr_0 = 1.4717e-04
Loss = 3.8482e-02, PNorm = 70.2574, GNorm = 0.6410, lr_0 = 1.4707e-04
Loss = 3.9454e-02, PNorm = 70.2604, GNorm = 0.5713, lr_0 = 1.4697e-04
Loss = 3.7967e-02, PNorm = 70.2631, GNorm = 0.5424, lr_0 = 1.4687e-04
Loss = 3.9219e-02, PNorm = 70.2649, GNorm = 0.4809, lr_0 = 1.4677e-04
Loss = 3.3355e-02, PNorm = 70.2665, GNorm = 0.5565, lr_0 = 1.4667e-04
Loss = 3.4567e-02, PNorm = 70.2687, GNorm = 0.3901, lr_0 = 1.4657e-04
Loss = 4.2704e-02, PNorm = 70.2714, GNorm = 0.6206, lr_0 = 1.4647e-04
Loss = 3.6519e-02, PNorm = 70.2744, GNorm = 0.5873, lr_0 = 1.4637e-04
Loss = 3.6408e-02, PNorm = 70.2777, GNorm = 0.5591, lr_0 = 1.4627e-04
Loss = 3.2746e-02, PNorm = 70.2801, GNorm = 0.3265, lr_0 = 1.4617e-04
Loss = 4.0957e-02, PNorm = 70.2833, GNorm = 0.5301, lr_0 = 1.4607e-04
Loss = 3.3819e-02, PNorm = 70.2853, GNorm = 0.4626, lr_0 = 1.4597e-04
Loss = 3.1192e-02, PNorm = 70.2876, GNorm = 0.3540, lr_0 = 1.4587e-04
Loss = 3.1360e-02, PNorm = 70.2903, GNorm = 0.3062, lr_0 = 1.4577e-04
Loss = 3.5751e-02, PNorm = 70.2923, GNorm = 0.4079, lr_0 = 1.4567e-04
Loss = 3.2983e-02, PNorm = 70.2944, GNorm = 0.4924, lr_0 = 1.4557e-04
Loss = 4.0985e-02, PNorm = 70.2961, GNorm = 0.6714, lr_0 = 1.4547e-04
Loss = 3.7692e-02, PNorm = 70.2981, GNorm = 0.7383, lr_0 = 1.4537e-04
Loss = 3.4218e-02, PNorm = 70.3012, GNorm = 0.3860, lr_0 = 1.4527e-04
Loss = 3.4502e-02, PNorm = 70.3046, GNorm = 0.3986, lr_0 = 1.4517e-04
Loss = 3.3630e-02, PNorm = 70.3068, GNorm = 0.6025, lr_0 = 1.4507e-04
Loss = 3.8365e-02, PNorm = 70.3091, GNorm = 0.5819, lr_0 = 1.4497e-04
Loss = 3.6391e-02, PNorm = 70.3115, GNorm = 0.4602, lr_0 = 1.4487e-04
Loss = 2.9409e-02, PNorm = 70.3142, GNorm = 0.4368, lr_0 = 1.4477e-04
Loss = 3.2631e-02, PNorm = 70.3153, GNorm = 0.4206, lr_0 = 1.4467e-04
Loss = 3.5572e-02, PNorm = 70.3164, GNorm = 0.4842, lr_0 = 1.4457e-04
Loss = 3.7402e-02, PNorm = 70.3170, GNorm = 0.5486, lr_0 = 1.4447e-04
Loss = 3.5805e-02, PNorm = 70.3190, GNorm = 0.3907, lr_0 = 1.4438e-04
Loss = 3.7078e-02, PNorm = 70.3219, GNorm = 0.5896, lr_0 = 1.4428e-04
Loss = 3.3227e-02, PNorm = 70.3256, GNorm = 0.4616, lr_0 = 1.4418e-04
Loss = 3.8103e-02, PNorm = 70.3287, GNorm = 0.4687, lr_0 = 1.4408e-04
Loss = 3.6641e-02, PNorm = 70.3306, GNorm = 0.6467, lr_0 = 1.4398e-04
Loss = 3.4829e-02, PNorm = 70.3322, GNorm = 0.4117, lr_0 = 1.4388e-04
Loss = 3.4618e-02, PNorm = 70.3352, GNorm = 0.4906, lr_0 = 1.4378e-04
Loss = 3.0250e-02, PNorm = 70.3368, GNorm = 0.4232, lr_0 = 1.4368e-04
Loss = 3.9357e-02, PNorm = 70.3386, GNorm = 0.6241, lr_0 = 1.4359e-04
Loss = 3.5624e-02, PNorm = 70.3408, GNorm = 0.4372, lr_0 = 1.4349e-04
Loss = 3.5080e-02, PNorm = 70.3429, GNorm = 0.5284, lr_0 = 1.4339e-04
Loss = 3.6547e-02, PNorm = 70.3456, GNorm = 0.6334, lr_0 = 1.4329e-04
Loss = 3.4721e-02, PNorm = 70.3477, GNorm = 0.4256, lr_0 = 1.4319e-04
Loss = 3.9747e-02, PNorm = 70.3491, GNorm = 0.7252, lr_0 = 1.4310e-04
Loss = 4.2746e-02, PNorm = 70.3530, GNorm = 0.3910, lr_0 = 1.4300e-04
Loss = 4.2866e-02, PNorm = 70.3558, GNorm = 0.4478, lr_0 = 1.4290e-04
Loss = 4.1825e-02, PNorm = 70.3569, GNorm = 0.5509, lr_0 = 1.4280e-04
Loss = 3.1049e-02, PNorm = 70.3592, GNorm = 0.7197, lr_0 = 1.4270e-04
Loss = 3.4785e-02, PNorm = 70.3614, GNorm = 0.6928, lr_0 = 1.4261e-04
Loss = 4.5889e-02, PNorm = 70.3628, GNorm = 0.5393, lr_0 = 1.4251e-04
Loss = 3.6836e-02, PNorm = 70.3645, GNorm = 0.6564, lr_0 = 1.4241e-04
Loss = 3.7793e-02, PNorm = 70.3662, GNorm = 0.5491, lr_0 = 1.4231e-04
Loss = 3.9507e-02, PNorm = 70.3684, GNorm = 0.4234, lr_0 = 1.4222e-04
Loss = 3.6995e-02, PNorm = 70.3709, GNorm = 0.4705, lr_0 = 1.4212e-04
Loss = 3.8514e-02, PNorm = 70.3728, GNorm = 0.5718, lr_0 = 1.4202e-04
Loss = 3.6083e-02, PNorm = 70.3749, GNorm = 0.6065, lr_0 = 1.4192e-04
Loss = 3.4359e-02, PNorm = 70.3758, GNorm = 0.3679, lr_0 = 1.4183e-04
Loss = 3.9420e-02, PNorm = 70.3781, GNorm = 0.5440, lr_0 = 1.4173e-04
Loss = 3.4028e-02, PNorm = 70.3803, GNorm = 0.4100, lr_0 = 1.4163e-04
Loss = 3.0894e-02, PNorm = 70.3830, GNorm = 0.3953, lr_0 = 1.4153e-04
Loss = 3.2129e-02, PNorm = 70.3848, GNorm = 0.4653, lr_0 = 1.4144e-04
Loss = 3.8912e-02, PNorm = 70.3863, GNorm = 0.8135, lr_0 = 1.4134e-04
Loss = 3.2362e-02, PNorm = 70.3898, GNorm = 0.4750, lr_0 = 1.4124e-04
Loss = 3.8966e-02, PNorm = 70.3912, GNorm = 0.5145, lr_0 = 1.4115e-04
Loss = 3.5833e-02, PNorm = 70.3921, GNorm = 0.4930, lr_0 = 1.4105e-04
Loss = 3.8828e-02, PNorm = 70.3937, GNorm = 0.6873, lr_0 = 1.4095e-04
Loss = 3.3275e-02, PNorm = 70.3955, GNorm = 0.4765, lr_0 = 1.4086e-04
Loss = 3.4728e-02, PNorm = 70.3971, GNorm = 0.5668, lr_0 = 1.4076e-04
Loss = 3.5235e-02, PNorm = 70.3976, GNorm = 0.5411, lr_0 = 1.4066e-04
Loss = 3.0069e-02, PNorm = 70.3980, GNorm = 0.4540, lr_0 = 1.4057e-04
Loss = 2.9732e-02, PNorm = 70.3986, GNorm = 0.3551, lr_0 = 1.4047e-04
Loss = 3.8459e-02, PNorm = 70.3995, GNorm = 0.5360, lr_0 = 1.4038e-04
Loss = 4.2212e-02, PNorm = 70.4016, GNorm = 0.4963, lr_0 = 1.4028e-04
Loss = 3.4997e-02, PNorm = 70.4041, GNorm = 0.4668, lr_0 = 1.4018e-04
Loss = 4.1397e-02, PNorm = 70.4056, GNorm = 0.4851, lr_0 = 1.4009e-04
Loss = 3.3451e-02, PNorm = 70.4077, GNorm = 1.1289, lr_0 = 1.3999e-04
Loss = 3.5773e-02, PNorm = 70.4095, GNorm = 0.4093, lr_0 = 1.3990e-04
Loss = 3.4060e-02, PNorm = 70.4120, GNorm = 0.6685, lr_0 = 1.3980e-04
Loss = 3.7199e-02, PNorm = 70.4131, GNorm = 0.4194, lr_0 = 1.3970e-04
Loss = 4.2163e-02, PNorm = 70.4136, GNorm = 0.5497, lr_0 = 1.3961e-04
Loss = 3.4725e-02, PNorm = 70.4144, GNorm = 0.6522, lr_0 = 1.3951e-04
Loss = 3.8970e-02, PNorm = 70.4155, GNorm = 0.4266, lr_0 = 1.3942e-04
Loss = 3.7867e-02, PNorm = 70.4164, GNorm = 0.3888, lr_0 = 1.3932e-04
Loss = 3.9402e-02, PNorm = 70.4181, GNorm = 0.6110, lr_0 = 1.3923e-04
Loss = 4.3005e-02, PNorm = 70.4200, GNorm = 0.4674, lr_0 = 1.3913e-04
Loss = 3.4085e-02, PNorm = 70.4226, GNorm = 0.3894, lr_0 = 1.3904e-04
Loss = 3.3690e-02, PNorm = 70.4244, GNorm = 0.6101, lr_0 = 1.3894e-04
Validation mae = 0.386843
Epoch 26
Loss = 2.9873e-02, PNorm = 70.4267, GNorm = 0.3658, lr_0 = 1.3884e-04
Loss = 3.0767e-02, PNorm = 70.4287, GNorm = 0.4150, lr_0 = 1.3875e-04
Loss = 3.5470e-02, PNorm = 70.4313, GNorm = 0.7545, lr_0 = 1.3865e-04
Loss = 3.0196e-02, PNorm = 70.4337, GNorm = 0.4968, lr_0 = 1.3856e-04
Loss = 3.2534e-02, PNorm = 70.4355, GNorm = 0.4714, lr_0 = 1.3846e-04
Loss = 3.1337e-02, PNorm = 70.4365, GNorm = 0.3964, lr_0 = 1.3837e-04
Loss = 2.8145e-02, PNorm = 70.4388, GNorm = 0.4046, lr_0 = 1.3828e-04
Loss = 3.1065e-02, PNorm = 70.4412, GNorm = 0.4404, lr_0 = 1.3818e-04
Loss = 3.3580e-02, PNorm = 70.4439, GNorm = 0.4365, lr_0 = 1.3809e-04
Loss = 3.1033e-02, PNorm = 70.4475, GNorm = 0.3817, lr_0 = 1.3799e-04
Loss = 3.7904e-02, PNorm = 70.4491, GNorm = 0.5646, lr_0 = 1.3790e-04
Loss = 2.6735e-02, PNorm = 70.4508, GNorm = 0.3966, lr_0 = 1.3780e-04
Loss = 3.7247e-02, PNorm = 70.4524, GNorm = 0.4569, lr_0 = 1.3771e-04
Loss = 3.3871e-02, PNorm = 70.4542, GNorm = 0.4908, lr_0 = 1.3761e-04
Loss = 3.4858e-02, PNorm = 70.4569, GNorm = 0.4832, lr_0 = 1.3752e-04
Loss = 3.6127e-02, PNorm = 70.4597, GNorm = 0.6320, lr_0 = 1.3742e-04
Loss = 3.2527e-02, PNorm = 70.4621, GNorm = 0.3936, lr_0 = 1.3733e-04
Loss = 3.1907e-02, PNorm = 70.4643, GNorm = 0.2980, lr_0 = 1.3724e-04
Loss = 3.1165e-02, PNorm = 70.4655, GNorm = 0.5425, lr_0 = 1.3714e-04
Loss = 3.8933e-02, PNorm = 70.4677, GNorm = 0.8066, lr_0 = 1.3705e-04
Loss = 3.4464e-02, PNorm = 70.4699, GNorm = 0.4469, lr_0 = 1.3695e-04
Loss = 3.1315e-02, PNorm = 70.4723, GNorm = 0.8213, lr_0 = 1.3686e-04
Loss = 3.0022e-02, PNorm = 70.4739, GNorm = 0.5534, lr_0 = 1.3677e-04
Loss = 3.5232e-02, PNorm = 70.4756, GNorm = 0.4728, lr_0 = 1.3667e-04
Loss = 2.9732e-02, PNorm = 70.4789, GNorm = 0.4718, lr_0 = 1.3658e-04
Loss = 3.3053e-02, PNorm = 70.4819, GNorm = 0.4262, lr_0 = 1.3649e-04
Loss = 3.0228e-02, PNorm = 70.4840, GNorm = 0.4084, lr_0 = 1.3639e-04
Loss = 3.7081e-02, PNorm = 70.4865, GNorm = 0.6756, lr_0 = 1.3630e-04
Loss = 3.0997e-02, PNorm = 70.4895, GNorm = 0.4331, lr_0 = 1.3621e-04
Loss = 3.3993e-02, PNorm = 70.4920, GNorm = 0.3222, lr_0 = 1.3611e-04
Loss = 3.6588e-02, PNorm = 70.4930, GNorm = 0.5503, lr_0 = 1.3602e-04
Loss = 4.0880e-02, PNorm = 70.4955, GNorm = 0.5593, lr_0 = 1.3593e-04
Loss = 3.1196e-02, PNorm = 70.4976, GNorm = 0.3756, lr_0 = 1.3583e-04
Loss = 3.2440e-02, PNorm = 70.4993, GNorm = 0.4158, lr_0 = 1.3574e-04
Loss = 3.9844e-02, PNorm = 70.5012, GNorm = 0.5939, lr_0 = 1.3565e-04
Loss = 3.0136e-02, PNorm = 70.5021, GNorm = 0.6616, lr_0 = 1.3555e-04
Loss = 3.5926e-02, PNorm = 70.5044, GNorm = 0.4378, lr_0 = 1.3546e-04
Loss = 3.2592e-02, PNorm = 70.5062, GNorm = 0.5766, lr_0 = 1.3537e-04
Loss = 2.9078e-02, PNorm = 70.5077, GNorm = 0.6539, lr_0 = 1.3528e-04
Loss = 3.1678e-02, PNorm = 70.5094, GNorm = 0.5615, lr_0 = 1.3518e-04
Loss = 3.8327e-02, PNorm = 70.5108, GNorm = 0.4048, lr_0 = 1.3509e-04
Loss = 3.7012e-02, PNorm = 70.5130, GNorm = 0.6210, lr_0 = 1.3500e-04
Loss = 3.5525e-02, PNorm = 70.5155, GNorm = 0.6757, lr_0 = 1.3491e-04
Loss = 3.0806e-02, PNorm = 70.5179, GNorm = 0.3850, lr_0 = 1.3481e-04
Loss = 2.9892e-02, PNorm = 70.5202, GNorm = 0.4502, lr_0 = 1.3472e-04
Loss = 3.9499e-02, PNorm = 70.5233, GNorm = 0.8283, lr_0 = 1.3463e-04
Loss = 3.3042e-02, PNorm = 70.5266, GNorm = 0.3961, lr_0 = 1.3454e-04
Loss = 2.7964e-02, PNorm = 70.5294, GNorm = 0.4359, lr_0 = 1.3444e-04
Loss = 3.5406e-02, PNorm = 70.5314, GNorm = 0.7706, lr_0 = 1.3435e-04
Loss = 3.2961e-02, PNorm = 70.5338, GNorm = 0.4157, lr_0 = 1.3426e-04
Loss = 3.1919e-02, PNorm = 70.5361, GNorm = 0.6069, lr_0 = 1.3417e-04
Loss = 3.2856e-02, PNorm = 70.5376, GNorm = 0.5869, lr_0 = 1.3408e-04
Loss = 2.9859e-02, PNorm = 70.5385, GNorm = 0.5503, lr_0 = 1.3398e-04
Loss = 4.0992e-02, PNorm = 70.5408, GNorm = 0.4523, lr_0 = 1.3389e-04
Loss = 3.2510e-02, PNorm = 70.5421, GNorm = 0.3820, lr_0 = 1.3380e-04
Loss = 3.2387e-02, PNorm = 70.5438, GNorm = 0.4228, lr_0 = 1.3371e-04
Loss = 2.9577e-02, PNorm = 70.5457, GNorm = 0.4406, lr_0 = 1.3362e-04
Loss = 2.8313e-02, PNorm = 70.5480, GNorm = 0.4232, lr_0 = 1.3353e-04
Loss = 3.5626e-02, PNorm = 70.5497, GNorm = 0.4609, lr_0 = 1.3343e-04
Loss = 3.2414e-02, PNorm = 70.5514, GNorm = 0.5126, lr_0 = 1.3334e-04
Loss = 3.8462e-02, PNorm = 70.5530, GNorm = 0.6541, lr_0 = 1.3325e-04
Loss = 4.0098e-02, PNorm = 70.5543, GNorm = 0.3304, lr_0 = 1.3316e-04
Loss = 3.8085e-02, PNorm = 70.5557, GNorm = 0.4818, lr_0 = 1.3307e-04
Loss = 2.7523e-02, PNorm = 70.5567, GNorm = 0.4474, lr_0 = 1.3298e-04
Loss = 3.5709e-02, PNorm = 70.5580, GNorm = 0.4759, lr_0 = 1.3289e-04
Loss = 3.2478e-02, PNorm = 70.5601, GNorm = 0.6060, lr_0 = 1.3280e-04
Loss = 3.7721e-02, PNorm = 70.5615, GNorm = 0.5796, lr_0 = 1.3270e-04
Loss = 3.4081e-02, PNorm = 70.5633, GNorm = 0.4047, lr_0 = 1.3261e-04
Loss = 4.2101e-02, PNorm = 70.5651, GNorm = 0.4762, lr_0 = 1.3252e-04
Loss = 3.4977e-02, PNorm = 70.5673, GNorm = 0.4145, lr_0 = 1.3243e-04
Loss = 3.2376e-02, PNorm = 70.5691, GNorm = 0.3719, lr_0 = 1.3234e-04
Loss = 3.4745e-02, PNorm = 70.5710, GNorm = 0.8973, lr_0 = 1.3225e-04
Loss = 3.2890e-02, PNorm = 70.5736, GNorm = 0.4256, lr_0 = 1.3216e-04
Loss = 3.0879e-02, PNorm = 70.5758, GNorm = 0.3762, lr_0 = 1.3207e-04
Loss = 3.7287e-02, PNorm = 70.5770, GNorm = 0.3955, lr_0 = 1.3198e-04
Loss = 3.5687e-02, PNorm = 70.5781, GNorm = 0.6110, lr_0 = 1.3189e-04
Loss = 3.4031e-02, PNorm = 70.5797, GNorm = 0.4781, lr_0 = 1.3180e-04
Loss = 3.0342e-02, PNorm = 70.5823, GNorm = 0.7088, lr_0 = 1.3171e-04
Loss = 3.4400e-02, PNorm = 70.5830, GNorm = 0.4058, lr_0 = 1.3162e-04
Loss = 3.2922e-02, PNorm = 70.5836, GNorm = 0.5560, lr_0 = 1.3153e-04
Loss = 3.0225e-02, PNorm = 70.5848, GNorm = 0.6055, lr_0 = 1.3144e-04
Loss = 2.8656e-02, PNorm = 70.5855, GNorm = 0.4236, lr_0 = 1.3135e-04
Loss = 3.3073e-02, PNorm = 70.5866, GNorm = 0.4744, lr_0 = 1.3126e-04
Loss = 3.1893e-02, PNorm = 70.5875, GNorm = 0.5066, lr_0 = 1.3117e-04
Loss = 3.0677e-02, PNorm = 70.5892, GNorm = 0.4662, lr_0 = 1.3108e-04
Loss = 4.0163e-02, PNorm = 70.5902, GNorm = 0.4272, lr_0 = 1.3099e-04
Loss = 2.8626e-02, PNorm = 70.5923, GNorm = 0.5585, lr_0 = 1.3090e-04
Loss = 3.6380e-02, PNorm = 70.5946, GNorm = 0.3253, lr_0 = 1.3081e-04
Loss = 3.8685e-02, PNorm = 70.5970, GNorm = 0.5174, lr_0 = 1.3072e-04
Loss = 3.8229e-02, PNorm = 70.5998, GNorm = 0.5192, lr_0 = 1.3063e-04
Loss = 3.4091e-02, PNorm = 70.6019, GNorm = 0.4810, lr_0 = 1.3054e-04
Loss = 3.3520e-02, PNorm = 70.6036, GNorm = 0.4906, lr_0 = 1.3045e-04
Loss = 3.0781e-02, PNorm = 70.6063, GNorm = 0.4058, lr_0 = 1.3036e-04
Loss = 3.7889e-02, PNorm = 70.6086, GNorm = 0.5997, lr_0 = 1.3027e-04
Loss = 3.3157e-02, PNorm = 70.6106, GNorm = 0.5258, lr_0 = 1.3018e-04
Loss = 3.1249e-02, PNorm = 70.6119, GNorm = 0.5312, lr_0 = 1.3009e-04
Loss = 3.6016e-02, PNorm = 70.6121, GNorm = 0.5917, lr_0 = 1.3000e-04
Loss = 3.7181e-02, PNorm = 70.6141, GNorm = 0.3860, lr_0 = 1.2992e-04
Loss = 3.8571e-02, PNorm = 70.6163, GNorm = 0.3850, lr_0 = 1.2983e-04
Loss = 3.5205e-02, PNorm = 70.6183, GNorm = 0.4339, lr_0 = 1.2974e-04
Loss = 3.8910e-02, PNorm = 70.6186, GNorm = 0.4558, lr_0 = 1.2965e-04
Loss = 3.7118e-02, PNorm = 70.6197, GNorm = 0.5841, lr_0 = 1.2956e-04
Loss = 3.1665e-02, PNorm = 70.6208, GNorm = 0.5660, lr_0 = 1.2947e-04
Loss = 3.6828e-02, PNorm = 70.6221, GNorm = 0.5089, lr_0 = 1.2938e-04
Loss = 3.5259e-02, PNorm = 70.6242, GNorm = 0.4882, lr_0 = 1.2929e-04
Loss = 3.6027e-02, PNorm = 70.6263, GNorm = 0.3992, lr_0 = 1.2921e-04
Loss = 3.8612e-02, PNorm = 70.6288, GNorm = 0.4019, lr_0 = 1.2912e-04
Loss = 3.5234e-02, PNorm = 70.6306, GNorm = 0.4440, lr_0 = 1.2903e-04
Loss = 3.9237e-02, PNorm = 70.6313, GNorm = 0.6912, lr_0 = 1.2894e-04
Loss = 3.1662e-02, PNorm = 70.6327, GNorm = 0.4962, lr_0 = 1.2885e-04
Loss = 3.7940e-02, PNorm = 70.6337, GNorm = 0.3888, lr_0 = 1.2876e-04
Loss = 3.6463e-02, PNorm = 70.6349, GNorm = 0.5357, lr_0 = 1.2867e-04
Loss = 3.8562e-02, PNorm = 70.6375, GNorm = 0.4508, lr_0 = 1.2859e-04
Loss = 3.3981e-02, PNorm = 70.6392, GNorm = 0.4614, lr_0 = 1.2850e-04
Loss = 3.5370e-02, PNorm = 70.6405, GNorm = 0.4243, lr_0 = 1.2841e-04
Loss = 3.1809e-02, PNorm = 70.6417, GNorm = 0.5803, lr_0 = 1.2832e-04
Loss = 3.2915e-02, PNorm = 70.6424, GNorm = 0.5378, lr_0 = 1.2823e-04
Loss = 3.4716e-02, PNorm = 70.6442, GNorm = 0.7749, lr_0 = 1.2815e-04
Loss = 3.6253e-02, PNorm = 70.6463, GNorm = 0.3912, lr_0 = 1.2806e-04
Loss = 3.7544e-02, PNorm = 70.6490, GNorm = 0.6117, lr_0 = 1.2797e-04
Validation mae = 0.391123
Epoch 27
Loss = 2.4676e-02, PNorm = 70.6502, GNorm = 0.6407, lr_0 = 1.2788e-04
Loss = 3.0995e-02, PNorm = 70.6518, GNorm = 0.4399, lr_0 = 1.2780e-04
Loss = 2.7494e-02, PNorm = 70.6541, GNorm = 0.4586, lr_0 = 1.2771e-04
Loss = 2.9996e-02, PNorm = 70.6569, GNorm = 0.3497, lr_0 = 1.2762e-04
Loss = 3.0531e-02, PNorm = 70.6584, GNorm = 0.4253, lr_0 = 1.2753e-04
Loss = 2.6532e-02, PNorm = 70.6594, GNorm = 0.3687, lr_0 = 1.2745e-04
Loss = 2.6426e-02, PNorm = 70.6609, GNorm = 0.3923, lr_0 = 1.2736e-04
Loss = 2.6713e-02, PNorm = 70.6620, GNorm = 0.5128, lr_0 = 1.2727e-04
Loss = 3.2650e-02, PNorm = 70.6640, GNorm = 0.3635, lr_0 = 1.2718e-04
Loss = 2.9237e-02, PNorm = 70.6655, GNorm = 0.3556, lr_0 = 1.2710e-04
Loss = 3.0746e-02, PNorm = 70.6665, GNorm = 0.4020, lr_0 = 1.2701e-04
Loss = 2.8957e-02, PNorm = 70.6682, GNorm = 0.5829, lr_0 = 1.2692e-04
Loss = 3.1749e-02, PNorm = 70.6705, GNorm = 0.3691, lr_0 = 1.2684e-04
Loss = 3.2735e-02, PNorm = 70.6735, GNorm = 0.4336, lr_0 = 1.2675e-04
Loss = 2.5432e-02, PNorm = 70.6764, GNorm = 0.5129, lr_0 = 1.2666e-04
Loss = 2.8507e-02, PNorm = 70.6780, GNorm = 0.5324, lr_0 = 1.2658e-04
Loss = 2.8477e-02, PNorm = 70.6785, GNorm = 0.5058, lr_0 = 1.2649e-04
Loss = 3.1754e-02, PNorm = 70.6790, GNorm = 0.6132, lr_0 = 1.2640e-04
Loss = 3.5683e-02, PNorm = 70.6810, GNorm = 0.5487, lr_0 = 1.2632e-04
Loss = 2.9636e-02, PNorm = 70.6835, GNorm = 0.3518, lr_0 = 1.2623e-04
Loss = 3.3171e-02, PNorm = 70.6842, GNorm = 0.5080, lr_0 = 1.2614e-04
Loss = 3.7590e-02, PNorm = 70.6862, GNorm = 0.5652, lr_0 = 1.2606e-04
Loss = 3.4805e-02, PNorm = 70.6881, GNorm = 0.3747, lr_0 = 1.2597e-04
Loss = 3.8150e-02, PNorm = 70.6892, GNorm = 0.5831, lr_0 = 1.2588e-04
Loss = 3.1370e-02, PNorm = 70.6905, GNorm = 0.4374, lr_0 = 1.2580e-04
Loss = 3.0975e-02, PNorm = 70.6927, GNorm = 0.7428, lr_0 = 1.2571e-04
Loss = 3.5219e-02, PNorm = 70.6949, GNorm = 0.4197, lr_0 = 1.2563e-04
Loss = 3.5517e-02, PNorm = 70.6965, GNorm = 0.3914, lr_0 = 1.2554e-04
Loss = 3.3127e-02, PNorm = 70.6986, GNorm = 0.6439, lr_0 = 1.2545e-04
Loss = 3.3706e-02, PNorm = 70.7012, GNorm = 0.4977, lr_0 = 1.2537e-04
Loss = 2.9089e-02, PNorm = 70.7036, GNorm = 0.4397, lr_0 = 1.2528e-04
Loss = 3.0129e-02, PNorm = 70.7049, GNorm = 0.7156, lr_0 = 1.2520e-04
Loss = 2.6368e-02, PNorm = 70.7072, GNorm = 0.3698, lr_0 = 1.2511e-04
Loss = 2.9414e-02, PNorm = 70.7099, GNorm = 0.3716, lr_0 = 1.2502e-04
Loss = 3.5549e-02, PNorm = 70.7115, GNorm = 0.5621, lr_0 = 1.2494e-04
Loss = 3.1592e-02, PNorm = 70.7134, GNorm = 0.6018, lr_0 = 1.2485e-04
Loss = 3.4447e-02, PNorm = 70.7140, GNorm = 0.5878, lr_0 = 1.2477e-04
Loss = 3.2463e-02, PNorm = 70.7157, GNorm = 0.4473, lr_0 = 1.2468e-04
Loss = 3.3005e-02, PNorm = 70.7181, GNorm = 0.5300, lr_0 = 1.2460e-04
Loss = 3.3343e-02, PNorm = 70.7202, GNorm = 0.4589, lr_0 = 1.2451e-04
Loss = 3.0357e-02, PNorm = 70.7216, GNorm = 0.4642, lr_0 = 1.2443e-04
Loss = 3.2869e-02, PNorm = 70.7234, GNorm = 0.5106, lr_0 = 1.2434e-04
Loss = 3.3805e-02, PNorm = 70.7258, GNorm = 0.5429, lr_0 = 1.2426e-04
Loss = 3.3448e-02, PNorm = 70.7279, GNorm = 0.4675, lr_0 = 1.2417e-04
Loss = 3.1103e-02, PNorm = 70.7296, GNorm = 0.6718, lr_0 = 1.2409e-04
Loss = 3.2145e-02, PNorm = 70.7315, GNorm = 0.5204, lr_0 = 1.2400e-04
Loss = 3.1675e-02, PNorm = 70.7340, GNorm = 0.4600, lr_0 = 1.2392e-04
Loss = 2.9223e-02, PNorm = 70.7366, GNorm = 0.6301, lr_0 = 1.2383e-04
Loss = 3.2311e-02, PNorm = 70.7379, GNorm = 0.6197, lr_0 = 1.2375e-04
Loss = 3.3007e-02, PNorm = 70.7398, GNorm = 0.3946, lr_0 = 1.2366e-04
Loss = 2.9452e-02, PNorm = 70.7413, GNorm = 0.3588, lr_0 = 1.2358e-04
Loss = 3.0976e-02, PNorm = 70.7425, GNorm = 0.4432, lr_0 = 1.2349e-04
Loss = 3.6039e-02, PNorm = 70.7443, GNorm = 0.4018, lr_0 = 1.2341e-04
Loss = 3.8012e-02, PNorm = 70.7458, GNorm = 0.6225, lr_0 = 1.2332e-04
Loss = 3.1805e-02, PNorm = 70.7474, GNorm = 0.5043, lr_0 = 1.2324e-04
Loss = 2.8123e-02, PNorm = 70.7492, GNorm = 0.3468, lr_0 = 1.2315e-04
Loss = 3.6064e-02, PNorm = 70.7504, GNorm = 0.6397, lr_0 = 1.2307e-04
Loss = 3.7183e-02, PNorm = 70.7535, GNorm = 0.4756, lr_0 = 1.2298e-04
Loss = 3.4519e-02, PNorm = 70.7558, GNorm = 0.6021, lr_0 = 1.2290e-04
Loss = 3.8268e-02, PNorm = 70.7582, GNorm = 0.6430, lr_0 = 1.2282e-04
Loss = 3.1732e-02, PNorm = 70.7601, GNorm = 0.5797, lr_0 = 1.2273e-04
Loss = 3.4960e-02, PNorm = 70.7617, GNorm = 0.5249, lr_0 = 1.2265e-04
Loss = 3.3620e-02, PNorm = 70.7627, GNorm = 0.5148, lr_0 = 1.2256e-04
Loss = 2.9500e-02, PNorm = 70.7631, GNorm = 0.3806, lr_0 = 1.2248e-04
Loss = 2.8099e-02, PNorm = 70.7645, GNorm = 0.4458, lr_0 = 1.2240e-04
Loss = 3.7566e-02, PNorm = 70.7662, GNorm = 0.4597, lr_0 = 1.2231e-04
Loss = 3.0424e-02, PNorm = 70.7680, GNorm = 0.4912, lr_0 = 1.2223e-04
Loss = 3.4211e-02, PNorm = 70.7690, GNorm = 0.5006, lr_0 = 1.2214e-04
Loss = 3.9798e-02, PNorm = 70.7693, GNorm = 0.5872, lr_0 = 1.2206e-04
Loss = 3.7687e-02, PNorm = 70.7709, GNorm = 0.5437, lr_0 = 1.2198e-04
Loss = 2.7431e-02, PNorm = 70.7731, GNorm = 0.5711, lr_0 = 1.2189e-04
Loss = 3.4896e-02, PNorm = 70.7762, GNorm = 0.5589, lr_0 = 1.2181e-04
Loss = 2.9966e-02, PNorm = 70.7779, GNorm = 0.4473, lr_0 = 1.2173e-04
Loss = 3.0528e-02, PNorm = 70.7796, GNorm = 0.4825, lr_0 = 1.2164e-04
Loss = 3.5243e-02, PNorm = 70.7813, GNorm = 1.0092, lr_0 = 1.2156e-04
Loss = 3.2142e-02, PNorm = 70.7835, GNorm = 0.6010, lr_0 = 1.2148e-04
Loss = 3.7132e-02, PNorm = 70.7855, GNorm = 0.5291, lr_0 = 1.2139e-04
Loss = 3.0528e-02, PNorm = 70.7878, GNorm = 0.4656, lr_0 = 1.2131e-04
Loss = 3.3307e-02, PNorm = 70.7893, GNorm = 0.5483, lr_0 = 1.2123e-04
Loss = 3.4830e-02, PNorm = 70.7902, GNorm = 0.5303, lr_0 = 1.2114e-04
Loss = 3.2474e-02, PNorm = 70.7920, GNorm = 0.5944, lr_0 = 1.2106e-04
Loss = 3.4904e-02, PNorm = 70.7936, GNorm = 0.6701, lr_0 = 1.2098e-04
Loss = 3.8850e-02, PNorm = 70.7951, GNorm = 0.6026, lr_0 = 1.2090e-04
Loss = 3.3652e-02, PNorm = 70.7976, GNorm = 0.5555, lr_0 = 1.2081e-04
Loss = 3.0768e-02, PNorm = 70.7988, GNorm = 0.5956, lr_0 = 1.2073e-04
Loss = 3.3454e-02, PNorm = 70.7997, GNorm = 0.3591, lr_0 = 1.2065e-04
Loss = 3.8590e-02, PNorm = 70.8019, GNorm = 0.4291, lr_0 = 1.2056e-04
Loss = 3.1862e-02, PNorm = 70.8049, GNorm = 0.4770, lr_0 = 1.2048e-04
Loss = 3.2517e-02, PNorm = 70.8072, GNorm = 0.4735, lr_0 = 1.2040e-04
Loss = 3.7935e-02, PNorm = 70.8079, GNorm = 0.4500, lr_0 = 1.2032e-04
Loss = 4.0226e-02, PNorm = 70.8093, GNorm = 0.4424, lr_0 = 1.2023e-04
Loss = 2.9636e-02, PNorm = 70.8113, GNorm = 0.5709, lr_0 = 1.2015e-04
Loss = 3.4795e-02, PNorm = 70.8131, GNorm = 0.4553, lr_0 = 1.2007e-04
Loss = 3.8498e-02, PNorm = 70.8142, GNorm = 0.6184, lr_0 = 1.1999e-04
Loss = 3.3115e-02, PNorm = 70.8155, GNorm = 0.6590, lr_0 = 1.1991e-04
Loss = 3.1067e-02, PNorm = 70.8165, GNorm = 0.3683, lr_0 = 1.1982e-04
Loss = 3.6962e-02, PNorm = 70.8181, GNorm = 0.4425, lr_0 = 1.1974e-04
Loss = 3.5327e-02, PNorm = 70.8204, GNorm = 0.5984, lr_0 = 1.1966e-04
Loss = 3.1586e-02, PNorm = 70.8215, GNorm = 0.4828, lr_0 = 1.1958e-04
Loss = 2.8556e-02, PNorm = 70.8222, GNorm = 0.4042, lr_0 = 1.1950e-04
Loss = 3.4468e-02, PNorm = 70.8236, GNorm = 0.5256, lr_0 = 1.1941e-04
Loss = 3.1145e-02, PNorm = 70.8251, GNorm = 0.3875, lr_0 = 1.1933e-04
Loss = 3.4890e-02, PNorm = 70.8279, GNorm = 0.4826, lr_0 = 1.1925e-04
Loss = 2.9400e-02, PNorm = 70.8302, GNorm = 0.5311, lr_0 = 1.1917e-04
Loss = 3.7665e-02, PNorm = 70.8314, GNorm = 0.5499, lr_0 = 1.1909e-04
Loss = 2.8274e-02, PNorm = 70.8332, GNorm = 0.3984, lr_0 = 1.1901e-04
Loss = 3.3979e-02, PNorm = 70.8345, GNorm = 0.6204, lr_0 = 1.1892e-04
Loss = 3.2032e-02, PNorm = 70.8359, GNorm = 0.4652, lr_0 = 1.1884e-04
Loss = 2.7628e-02, PNorm = 70.8369, GNorm = 0.4848, lr_0 = 1.1876e-04
Loss = 3.5119e-02, PNorm = 70.8387, GNorm = 0.4368, lr_0 = 1.1868e-04
Loss = 3.4770e-02, PNorm = 70.8408, GNorm = 0.7024, lr_0 = 1.1860e-04
Loss = 3.3251e-02, PNorm = 70.8417, GNorm = 0.4496, lr_0 = 1.1852e-04
Loss = 3.2496e-02, PNorm = 70.8440, GNorm = 0.4666, lr_0 = 1.1844e-04
Loss = 2.9842e-02, PNorm = 70.8451, GNorm = 0.4935, lr_0 = 1.1835e-04
Loss = 3.1759e-02, PNorm = 70.8468, GNorm = 0.5277, lr_0 = 1.1827e-04
Loss = 3.1302e-02, PNorm = 70.8484, GNorm = 0.6581, lr_0 = 1.1819e-04
Loss = 3.0853e-02, PNorm = 70.8485, GNorm = 0.4428, lr_0 = 1.1811e-04
Loss = 3.1304e-02, PNorm = 70.8491, GNorm = 0.4828, lr_0 = 1.1803e-04
Loss = 3.1794e-02, PNorm = 70.8505, GNorm = 0.4893, lr_0 = 1.1795e-04
Loss = 3.6074e-02, PNorm = 70.8524, GNorm = 0.4845, lr_0 = 1.1787e-04
Validation mae = 0.388063
Epoch 28
Loss = 2.8406e-02, PNorm = 70.8548, GNorm = 0.4925, lr_0 = 1.1779e-04
Loss = 2.5050e-02, PNorm = 70.8568, GNorm = 0.4383, lr_0 = 1.1771e-04
Loss = 3.0555e-02, PNorm = 70.8588, GNorm = 0.6180, lr_0 = 1.1763e-04
Loss = 2.7550e-02, PNorm = 70.8605, GNorm = 0.3865, lr_0 = 1.1755e-04
Loss = 2.7416e-02, PNorm = 70.8623, GNorm = 0.3650, lr_0 = 1.1747e-04
Loss = 2.5795e-02, PNorm = 70.8639, GNorm = 0.4644, lr_0 = 1.1739e-04
Loss = 3.0398e-02, PNorm = 70.8665, GNorm = 0.5897, lr_0 = 1.1730e-04
Loss = 2.9267e-02, PNorm = 70.8691, GNorm = 0.4682, lr_0 = 1.1722e-04
Loss = 2.5710e-02, PNorm = 70.8701, GNorm = 0.5517, lr_0 = 1.1714e-04
Loss = 3.1177e-02, PNorm = 70.8721, GNorm = 0.4096, lr_0 = 1.1706e-04
Loss = 2.6907e-02, PNorm = 70.8732, GNorm = 0.5390, lr_0 = 1.1698e-04
Loss = 3.0892e-02, PNorm = 70.8736, GNorm = 0.3350, lr_0 = 1.1690e-04
Loss = 2.6206e-02, PNorm = 70.8747, GNorm = 0.4109, lr_0 = 1.1682e-04
Loss = 2.9189e-02, PNorm = 70.8772, GNorm = 0.4842, lr_0 = 1.1674e-04
Loss = 2.8618e-02, PNorm = 70.8796, GNorm = 0.5360, lr_0 = 1.1666e-04
Loss = 2.8621e-02, PNorm = 70.8815, GNorm = 0.3820, lr_0 = 1.1658e-04
Loss = 2.5395e-02, PNorm = 70.8834, GNorm = 0.3682, lr_0 = 1.1650e-04
Loss = 2.8666e-02, PNorm = 70.8852, GNorm = 0.4650, lr_0 = 1.1642e-04
Loss = 3.0577e-02, PNorm = 70.8879, GNorm = 0.4848, lr_0 = 1.1634e-04
Loss = 3.0074e-02, PNorm = 70.8906, GNorm = 0.4810, lr_0 = 1.1626e-04
Loss = 3.0083e-02, PNorm = 70.8922, GNorm = 0.5232, lr_0 = 1.1618e-04
Loss = 2.6917e-02, PNorm = 70.8936, GNorm = 0.3929, lr_0 = 1.1611e-04
Loss = 2.9303e-02, PNorm = 70.8950, GNorm = 0.5148, lr_0 = 1.1603e-04
Loss = 3.5520e-02, PNorm = 70.8963, GNorm = 0.9552, lr_0 = 1.1595e-04
Loss = 2.9630e-02, PNorm = 70.8989, GNorm = 0.4381, lr_0 = 1.1587e-04
Loss = 2.8334e-02, PNorm = 70.9017, GNorm = 0.6253, lr_0 = 1.1579e-04
Loss = 3.5108e-02, PNorm = 70.9044, GNorm = 0.5546, lr_0 = 1.1571e-04
Loss = 3.3976e-02, PNorm = 70.9063, GNorm = 0.8920, lr_0 = 1.1563e-04
Loss = 2.2412e-02, PNorm = 70.9082, GNorm = 0.3516, lr_0 = 1.1555e-04
Loss = 2.7373e-02, PNorm = 70.9091, GNorm = 0.8284, lr_0 = 1.1547e-04
Loss = 3.5148e-02, PNorm = 70.9094, GNorm = 0.4710, lr_0 = 1.1539e-04
Loss = 2.6143e-02, PNorm = 70.9114, GNorm = 0.3582, lr_0 = 1.1531e-04
Loss = 2.5656e-02, PNorm = 70.9130, GNorm = 0.4074, lr_0 = 1.1523e-04
Loss = 3.2454e-02, PNorm = 70.9154, GNorm = 0.3747, lr_0 = 1.1515e-04
Loss = 3.1676e-02, PNorm = 70.9178, GNorm = 0.5104, lr_0 = 1.1508e-04
Loss = 2.7356e-02, PNorm = 70.9191, GNorm = 0.4681, lr_0 = 1.1500e-04
Loss = 3.1715e-02, PNorm = 70.9194, GNorm = 0.4113, lr_0 = 1.1492e-04
Loss = 2.9786e-02, PNorm = 70.9203, GNorm = 0.5764, lr_0 = 1.1484e-04
Loss = 2.7264e-02, PNorm = 70.9225, GNorm = 0.3665, lr_0 = 1.1476e-04
Loss = 2.6826e-02, PNorm = 70.9231, GNorm = 0.4723, lr_0 = 1.1468e-04
Loss = 3.5896e-02, PNorm = 70.9241, GNorm = 0.7691, lr_0 = 1.1460e-04
Loss = 3.1988e-02, PNorm = 70.9256, GNorm = 0.3750, lr_0 = 1.1452e-04
Loss = 3.3306e-02, PNorm = 70.9263, GNorm = 0.6816, lr_0 = 1.1445e-04
Loss = 3.4643e-02, PNorm = 70.9275, GNorm = 0.6288, lr_0 = 1.1437e-04
Loss = 3.0158e-02, PNorm = 70.9288, GNorm = 0.4163, lr_0 = 1.1429e-04
Loss = 3.1148e-02, PNorm = 70.9299, GNorm = 0.3713, lr_0 = 1.1421e-04
Loss = 3.1345e-02, PNorm = 70.9312, GNorm = 0.4509, lr_0 = 1.1413e-04
Loss = 3.1565e-02, PNorm = 70.9329, GNorm = 0.3991, lr_0 = 1.1405e-04
Loss = 3.0029e-02, PNorm = 70.9352, GNorm = 0.5040, lr_0 = 1.1398e-04
Loss = 3.4106e-02, PNorm = 70.9384, GNorm = 0.4947, lr_0 = 1.1390e-04
Loss = 3.5740e-02, PNorm = 70.9403, GNorm = 0.4851, lr_0 = 1.1382e-04
Loss = 2.8634e-02, PNorm = 70.9420, GNorm = 0.3914, lr_0 = 1.1374e-04
Loss = 3.0868e-02, PNorm = 70.9437, GNorm = 0.3872, lr_0 = 1.1366e-04
Loss = 3.6325e-02, PNorm = 70.9456, GNorm = 0.3991, lr_0 = 1.1359e-04
Loss = 3.1038e-02, PNorm = 70.9476, GNorm = 0.6327, lr_0 = 1.1351e-04
Loss = 2.9553e-02, PNorm = 70.9492, GNorm = 0.4076, lr_0 = 1.1343e-04
Loss = 3.0166e-02, PNorm = 70.9514, GNorm = 0.4968, lr_0 = 1.1335e-04
Loss = 2.7769e-02, PNorm = 70.9528, GNorm = 0.5074, lr_0 = 1.1328e-04
Loss = 3.1721e-02, PNorm = 70.9545, GNorm = 0.5344, lr_0 = 1.1320e-04
Loss = 3.4647e-02, PNorm = 70.9559, GNorm = 0.5235, lr_0 = 1.1312e-04
Loss = 2.8947e-02, PNorm = 70.9572, GNorm = 0.5046, lr_0 = 1.1304e-04
Loss = 2.9810e-02, PNorm = 70.9584, GNorm = 0.5005, lr_0 = 1.1297e-04
Loss = 3.4208e-02, PNorm = 70.9607, GNorm = 0.5715, lr_0 = 1.1289e-04
Loss = 3.1565e-02, PNorm = 70.9627, GNorm = 0.3792, lr_0 = 1.1281e-04
Loss = 3.0616e-02, PNorm = 70.9638, GNorm = 0.3382, lr_0 = 1.1273e-04
Loss = 2.9144e-02, PNorm = 70.9650, GNorm = 0.5633, lr_0 = 1.1266e-04
Loss = 3.5070e-02, PNorm = 70.9668, GNorm = 0.4465, lr_0 = 1.1258e-04
Loss = 3.1944e-02, PNorm = 70.9676, GNorm = 0.3953, lr_0 = 1.1250e-04
Loss = 3.5096e-02, PNorm = 70.9678, GNorm = 0.4500, lr_0 = 1.1243e-04
Loss = 2.8973e-02, PNorm = 70.9683, GNorm = 0.3871, lr_0 = 1.1235e-04
Loss = 3.2559e-02, PNorm = 70.9692, GNorm = 0.5192, lr_0 = 1.1227e-04
Loss = 2.5563e-02, PNorm = 70.9700, GNorm = 0.4794, lr_0 = 1.1219e-04
Loss = 3.4056e-02, PNorm = 70.9714, GNorm = 0.6242, lr_0 = 1.1212e-04
Loss = 3.1837e-02, PNorm = 70.9729, GNorm = 0.3826, lr_0 = 1.1204e-04
Loss = 3.3481e-02, PNorm = 70.9735, GNorm = 0.3552, lr_0 = 1.1196e-04
Loss = 3.1715e-02, PNorm = 70.9743, GNorm = 0.4328, lr_0 = 1.1189e-04
Loss = 3.3043e-02, PNorm = 70.9751, GNorm = 0.6538, lr_0 = 1.1181e-04
Loss = 3.6961e-02, PNorm = 70.9761, GNorm = 0.4396, lr_0 = 1.1173e-04
Loss = 3.0030e-02, PNorm = 70.9776, GNorm = 0.3309, lr_0 = 1.1166e-04
Loss = 2.7031e-02, PNorm = 70.9786, GNorm = 0.5097, lr_0 = 1.1158e-04
Loss = 3.0810e-02, PNorm = 70.9804, GNorm = 0.4608, lr_0 = 1.1150e-04
Loss = 3.4225e-02, PNorm = 70.9824, GNorm = 0.4141, lr_0 = 1.1143e-04
Loss = 2.9743e-02, PNorm = 70.9846, GNorm = 0.4853, lr_0 = 1.1135e-04
Loss = 2.8696e-02, PNorm = 70.9856, GNorm = 0.5303, lr_0 = 1.1128e-04
Loss = 3.1221e-02, PNorm = 70.9874, GNorm = 0.4861, lr_0 = 1.1120e-04
Loss = 3.5394e-02, PNorm = 70.9894, GNorm = 0.5371, lr_0 = 1.1112e-04
Loss = 3.3872e-02, PNorm = 70.9907, GNorm = 0.5895, lr_0 = 1.1105e-04
Loss = 2.8195e-02, PNorm = 70.9929, GNorm = 0.4587, lr_0 = 1.1097e-04
Loss = 3.3507e-02, PNorm = 70.9939, GNorm = 0.3890, lr_0 = 1.1089e-04
Loss = 3.0601e-02, PNorm = 70.9954, GNorm = 0.3736, lr_0 = 1.1082e-04
Loss = 3.3790e-02, PNorm = 70.9965, GNorm = 0.5569, lr_0 = 1.1074e-04
Loss = 3.1522e-02, PNorm = 70.9982, GNorm = 0.7391, lr_0 = 1.1067e-04
Loss = 3.3842e-02, PNorm = 70.9999, GNorm = 0.4505, lr_0 = 1.1059e-04
Loss = 2.8292e-02, PNorm = 71.0015, GNorm = 0.3798, lr_0 = 1.1052e-04
Loss = 3.1001e-02, PNorm = 71.0024, GNorm = 0.4417, lr_0 = 1.1044e-04
Loss = 3.4564e-02, PNorm = 71.0038, GNorm = 0.7224, lr_0 = 1.1036e-04
Loss = 3.1609e-02, PNorm = 71.0061, GNorm = 0.4488, lr_0 = 1.1029e-04
Loss = 3.7284e-02, PNorm = 71.0083, GNorm = 0.6989, lr_0 = 1.1021e-04
Loss = 3.2039e-02, PNorm = 71.0094, GNorm = 0.4723, lr_0 = 1.1014e-04
Loss = 3.1463e-02, PNorm = 71.0098, GNorm = 0.4110, lr_0 = 1.1006e-04
Loss = 3.3793e-02, PNorm = 71.0102, GNorm = 0.5069, lr_0 = 1.0999e-04
Loss = 3.1074e-02, PNorm = 71.0110, GNorm = 0.5357, lr_0 = 1.0991e-04
Loss = 3.3885e-02, PNorm = 71.0123, GNorm = 0.5912, lr_0 = 1.0984e-04
Loss = 3.2426e-02, PNorm = 71.0142, GNorm = 0.3820, lr_0 = 1.0976e-04
Loss = 3.1488e-02, PNorm = 71.0163, GNorm = 0.3589, lr_0 = 1.0969e-04
Loss = 3.4677e-02, PNorm = 71.0171, GNorm = 0.3901, lr_0 = 1.0961e-04
Loss = 3.5649e-02, PNorm = 71.0178, GNorm = 0.6506, lr_0 = 1.0954e-04
Loss = 2.7364e-02, PNorm = 71.0193, GNorm = 0.4919, lr_0 = 1.0946e-04
Loss = 3.3996e-02, PNorm = 71.0217, GNorm = 0.4717, lr_0 = 1.0939e-04
Loss = 2.9169e-02, PNorm = 71.0240, GNorm = 0.3885, lr_0 = 1.0931e-04
Loss = 2.9993e-02, PNorm = 71.0248, GNorm = 0.3932, lr_0 = 1.0924e-04
Loss = 3.0651e-02, PNorm = 71.0246, GNorm = 0.4701, lr_0 = 1.0916e-04
Loss = 3.1179e-02, PNorm = 71.0254, GNorm = 0.4520, lr_0 = 1.0909e-04
Loss = 3.2965e-02, PNorm = 71.0275, GNorm = 0.4666, lr_0 = 1.0901e-04
Loss = 3.3442e-02, PNorm = 71.0291, GNorm = 0.7166, lr_0 = 1.0894e-04
Loss = 3.2164e-02, PNorm = 71.0303, GNorm = 0.4666, lr_0 = 1.0886e-04
Loss = 3.4243e-02, PNorm = 71.0317, GNorm = 0.4751, lr_0 = 1.0879e-04
Loss = 3.2081e-02, PNorm = 71.0322, GNorm = 0.5858, lr_0 = 1.0871e-04
Loss = 3.6586e-02, PNorm = 71.0336, GNorm = 0.4268, lr_0 = 1.0864e-04
Loss = 3.5210e-02, PNorm = 71.0347, GNorm = 0.7126, lr_0 = 1.0856e-04
Validation mae = 0.389798
Epoch 29
Loss = 2.6219e-02, PNorm = 71.0368, GNorm = 0.4941, lr_0 = 1.0849e-04
Loss = 2.7088e-02, PNorm = 71.0386, GNorm = 0.4554, lr_0 = 1.0841e-04
Loss = 2.6797e-02, PNorm = 71.0407, GNorm = 0.2913, lr_0 = 1.0834e-04
Loss = 2.9029e-02, PNorm = 71.0433, GNorm = 0.6106, lr_0 = 1.0827e-04
Loss = 2.7082e-02, PNorm = 71.0458, GNorm = 0.5859, lr_0 = 1.0819e-04
Loss = 2.7306e-02, PNorm = 71.0479, GNorm = 0.6243, lr_0 = 1.0812e-04
Loss = 2.5752e-02, PNorm = 71.0493, GNorm = 0.6814, lr_0 = 1.0804e-04
Loss = 2.6198e-02, PNorm = 71.0512, GNorm = 0.4047, lr_0 = 1.0797e-04
Loss = 2.8059e-02, PNorm = 71.0528, GNorm = 0.3934, lr_0 = 1.0790e-04
Loss = 2.8260e-02, PNorm = 71.0541, GNorm = 0.5124, lr_0 = 1.0782e-04
Loss = 2.8233e-02, PNorm = 71.0551, GNorm = 0.3295, lr_0 = 1.0775e-04
Loss = 3.2150e-02, PNorm = 71.0563, GNorm = 0.7484, lr_0 = 1.0767e-04
Loss = 2.5757e-02, PNorm = 71.0579, GNorm = 0.4314, lr_0 = 1.0760e-04
Loss = 2.7388e-02, PNorm = 71.0587, GNorm = 0.3002, lr_0 = 1.0753e-04
Loss = 3.3596e-02, PNorm = 71.0597, GNorm = 0.4466, lr_0 = 1.0745e-04
Loss = 3.2724e-02, PNorm = 71.0612, GNorm = 0.4440, lr_0 = 1.0738e-04
Loss = 2.9926e-02, PNorm = 71.0628, GNorm = 0.4609, lr_0 = 1.0731e-04
Loss = 2.7254e-02, PNorm = 71.0639, GNorm = 0.5297, lr_0 = 1.0723e-04
Loss = 2.3310e-02, PNorm = 71.0649, GNorm = 0.4943, lr_0 = 1.0716e-04
Loss = 2.6891e-02, PNorm = 71.0659, GNorm = 0.4648, lr_0 = 1.0709e-04
Loss = 2.5481e-02, PNorm = 71.0675, GNorm = 0.4682, lr_0 = 1.0701e-04
Loss = 2.7924e-02, PNorm = 71.0685, GNorm = 0.4770, lr_0 = 1.0694e-04
Loss = 3.1475e-02, PNorm = 71.0694, GNorm = 0.6540, lr_0 = 1.0687e-04
Loss = 2.8015e-02, PNorm = 71.0706, GNorm = 0.4541, lr_0 = 1.0679e-04
Loss = 2.7383e-02, PNorm = 71.0715, GNorm = 0.3777, lr_0 = 1.0672e-04
Loss = 3.1495e-02, PNorm = 71.0737, GNorm = 0.4342, lr_0 = 1.0665e-04
Loss = 3.4029e-02, PNorm = 71.0764, GNorm = 0.5634, lr_0 = 1.0657e-04
Loss = 2.5437e-02, PNorm = 71.0791, GNorm = 0.5137, lr_0 = 1.0650e-04
Loss = 2.7541e-02, PNorm = 71.0812, GNorm = 0.5607, lr_0 = 1.0643e-04
Loss = 2.5326e-02, PNorm = 71.0829, GNorm = 0.4605, lr_0 = 1.0635e-04
Loss = 3.5595e-02, PNorm = 71.0845, GNorm = 0.4290, lr_0 = 1.0628e-04
Loss = 3.0430e-02, PNorm = 71.0864, GNorm = 0.4967, lr_0 = 1.0621e-04
Loss = 2.6910e-02, PNorm = 71.0874, GNorm = 0.4079, lr_0 = 1.0614e-04
Loss = 3.1285e-02, PNorm = 71.0891, GNorm = 0.4444, lr_0 = 1.0606e-04
Loss = 2.9015e-02, PNorm = 71.0909, GNorm = 0.7881, lr_0 = 1.0599e-04
Loss = 2.4516e-02, PNorm = 71.0923, GNorm = 0.4793, lr_0 = 1.0592e-04
Loss = 3.4655e-02, PNorm = 71.0941, GNorm = 0.7096, lr_0 = 1.0585e-04
Loss = 3.1368e-02, PNorm = 71.0956, GNorm = 0.4596, lr_0 = 1.0577e-04
Loss = 2.7547e-02, PNorm = 71.0965, GNorm = 0.3619, lr_0 = 1.0570e-04
Loss = 2.9562e-02, PNorm = 71.0972, GNorm = 0.3811, lr_0 = 1.0563e-04
Loss = 2.7713e-02, PNorm = 71.0986, GNorm = 0.3915, lr_0 = 1.0556e-04
Loss = 3.1041e-02, PNorm = 71.1000, GNorm = 0.4955, lr_0 = 1.0548e-04
Loss = 3.0183e-02, PNorm = 71.1015, GNorm = 0.4790, lr_0 = 1.0541e-04
Loss = 2.9319e-02, PNorm = 71.1031, GNorm = 0.4570, lr_0 = 1.0534e-04
Loss = 2.9471e-02, PNorm = 71.1048, GNorm = 0.5035, lr_0 = 1.0527e-04
Loss = 2.8561e-02, PNorm = 71.1064, GNorm = 0.4356, lr_0 = 1.0519e-04
Loss = 3.7757e-02, PNorm = 71.1073, GNorm = 0.5823, lr_0 = 1.0512e-04
Loss = 3.0186e-02, PNorm = 71.1082, GNorm = 0.5523, lr_0 = 1.0505e-04
Loss = 3.3173e-02, PNorm = 71.1095, GNorm = 0.5848, lr_0 = 1.0498e-04
Loss = 2.7240e-02, PNorm = 71.1104, GNorm = 0.3624, lr_0 = 1.0491e-04
Loss = 3.3307e-02, PNorm = 71.1118, GNorm = 0.5802, lr_0 = 1.0483e-04
Loss = 2.6991e-02, PNorm = 71.1127, GNorm = 0.5665, lr_0 = 1.0476e-04
Loss = 2.6102e-02, PNorm = 71.1137, GNorm = 0.4133, lr_0 = 1.0469e-04
Loss = 2.5889e-02, PNorm = 71.1145, GNorm = 0.4014, lr_0 = 1.0462e-04
Loss = 2.9692e-02, PNorm = 71.1161, GNorm = 0.4465, lr_0 = 1.0455e-04
Loss = 3.0689e-02, PNorm = 71.1170, GNorm = 0.4951, lr_0 = 1.0448e-04
Loss = 2.6488e-02, PNorm = 71.1165, GNorm = 0.4998, lr_0 = 1.0440e-04
Loss = 3.1517e-02, PNorm = 71.1168, GNorm = 0.5368, lr_0 = 1.0433e-04
Loss = 3.2559e-02, PNorm = 71.1183, GNorm = 0.4860, lr_0 = 1.0426e-04
Loss = 3.1412e-02, PNorm = 71.1205, GNorm = 0.5397, lr_0 = 1.0419e-04
Loss = 2.6877e-02, PNorm = 71.1218, GNorm = 0.4337, lr_0 = 1.0412e-04
Loss = 3.1544e-02, PNorm = 71.1222, GNorm = 0.4774, lr_0 = 1.0405e-04
Loss = 3.4182e-02, PNorm = 71.1239, GNorm = 0.5867, lr_0 = 1.0398e-04
Loss = 3.1662e-02, PNorm = 71.1256, GNorm = 0.4481, lr_0 = 1.0391e-04
Loss = 2.9697e-02, PNorm = 71.1269, GNorm = 0.5401, lr_0 = 1.0383e-04
Loss = 2.8070e-02, PNorm = 71.1280, GNorm = 0.4994, lr_0 = 1.0376e-04
Loss = 3.0875e-02, PNorm = 71.1294, GNorm = 0.4314, lr_0 = 1.0369e-04
Loss = 2.9753e-02, PNorm = 71.1307, GNorm = 0.4491, lr_0 = 1.0362e-04
Loss = 2.9863e-02, PNorm = 71.1322, GNorm = 0.4427, lr_0 = 1.0355e-04
Loss = 2.7387e-02, PNorm = 71.1332, GNorm = 0.5470, lr_0 = 1.0348e-04
Loss = 3.0733e-02, PNorm = 71.1342, GNorm = 0.5274, lr_0 = 1.0341e-04
Loss = 3.7043e-02, PNorm = 71.1361, GNorm = 0.7857, lr_0 = 1.0334e-04
Loss = 2.7509e-02, PNorm = 71.1375, GNorm = 0.4697, lr_0 = 1.0327e-04
Loss = 3.2344e-02, PNorm = 71.1395, GNorm = 0.3922, lr_0 = 1.0320e-04
Loss = 3.0843e-02, PNorm = 71.1410, GNorm = 0.5826, lr_0 = 1.0312e-04
Loss = 2.3260e-02, PNorm = 71.1418, GNorm = 0.4600, lr_0 = 1.0305e-04
Loss = 3.0211e-02, PNorm = 71.1426, GNorm = 0.3058, lr_0 = 1.0298e-04
Loss = 2.7501e-02, PNorm = 71.1445, GNorm = 0.6192, lr_0 = 1.0291e-04
Loss = 3.1539e-02, PNorm = 71.1459, GNorm = 0.3819, lr_0 = 1.0284e-04
Loss = 2.6880e-02, PNorm = 71.1465, GNorm = 0.4676, lr_0 = 1.0277e-04
Loss = 2.8554e-02, PNorm = 71.1483, GNorm = 0.4848, lr_0 = 1.0270e-04
Loss = 3.5892e-02, PNorm = 71.1502, GNorm = 0.5710, lr_0 = 1.0263e-04
Loss = 2.7862e-02, PNorm = 71.1517, GNorm = 0.5965, lr_0 = 1.0256e-04
Loss = 3.1816e-02, PNorm = 71.1534, GNorm = 0.8460, lr_0 = 1.0249e-04
Loss = 3.0973e-02, PNorm = 71.1545, GNorm = 0.4484, lr_0 = 1.0242e-04
Loss = 3.3727e-02, PNorm = 71.1553, GNorm = 0.6091, lr_0 = 1.0235e-04
Loss = 2.9960e-02, PNorm = 71.1560, GNorm = 0.8051, lr_0 = 1.0228e-04
Loss = 3.3464e-02, PNorm = 71.1582, GNorm = 0.7479, lr_0 = 1.0221e-04
Loss = 2.7333e-02, PNorm = 71.1607, GNorm = 0.4201, lr_0 = 1.0214e-04
Loss = 2.8516e-02, PNorm = 71.1625, GNorm = 0.7155, lr_0 = 1.0207e-04
Loss = 2.6779e-02, PNorm = 71.1641, GNorm = 0.5273, lr_0 = 1.0200e-04
Loss = 3.5370e-02, PNorm = 71.1650, GNorm = 0.4130, lr_0 = 1.0193e-04
Loss = 2.9656e-02, PNorm = 71.1662, GNorm = 0.4262, lr_0 = 1.0186e-04
Loss = 2.5981e-02, PNorm = 71.1667, GNorm = 0.3458, lr_0 = 1.0179e-04
Loss = 2.9297e-02, PNorm = 71.1673, GNorm = 0.4437, lr_0 = 1.0172e-04
Loss = 3.7050e-02, PNorm = 71.1677, GNorm = 0.4415, lr_0 = 1.0165e-04
Loss = 3.8745e-02, PNorm = 71.1681, GNorm = 0.5908, lr_0 = 1.0158e-04
Loss = 2.8427e-02, PNorm = 71.1693, GNorm = 0.4938, lr_0 = 1.0151e-04
Loss = 3.2594e-02, PNorm = 71.1705, GNorm = 0.4256, lr_0 = 1.0144e-04
Loss = 2.9465e-02, PNorm = 71.1729, GNorm = 0.5065, lr_0 = 1.0137e-04
Loss = 2.9826e-02, PNorm = 71.1746, GNorm = 0.4532, lr_0 = 1.0130e-04
Loss = 3.1675e-02, PNorm = 71.1757, GNorm = 0.4920, lr_0 = 1.0123e-04
Loss = 2.8914e-02, PNorm = 71.1767, GNorm = 0.4270, lr_0 = 1.0116e-04
Loss = 3.0489e-02, PNorm = 71.1779, GNorm = 0.4586, lr_0 = 1.0110e-04
Loss = 3.3058e-02, PNorm = 71.1794, GNorm = 0.7283, lr_0 = 1.0103e-04
Loss = 3.1190e-02, PNorm = 71.1799, GNorm = 0.6997, lr_0 = 1.0096e-04
Loss = 2.6815e-02, PNorm = 71.1812, GNorm = 0.3887, lr_0 = 1.0089e-04
Loss = 3.0024e-02, PNorm = 71.1832, GNorm = 0.3839, lr_0 = 1.0082e-04
Loss = 3.3926e-02, PNorm = 71.1853, GNorm = 0.5998, lr_0 = 1.0075e-04
Loss = 2.4282e-02, PNorm = 71.1865, GNorm = 0.6338, lr_0 = 1.0068e-04
Loss = 2.9414e-02, PNorm = 71.1872, GNorm = 0.5139, lr_0 = 1.0061e-04
Loss = 3.0473e-02, PNorm = 71.1877, GNorm = 0.4654, lr_0 = 1.0054e-04
Loss = 2.6586e-02, PNorm = 71.1883, GNorm = 0.4271, lr_0 = 1.0047e-04
Loss = 2.8625e-02, PNorm = 71.1892, GNorm = 0.3731, lr_0 = 1.0041e-04
Loss = 3.1306e-02, PNorm = 71.1900, GNorm = 0.4841, lr_0 = 1.0034e-04
Loss = 2.9172e-02, PNorm = 71.1907, GNorm = 0.4349, lr_0 = 1.0027e-04
Loss = 3.0352e-02, PNorm = 71.1926, GNorm = 0.4561, lr_0 = 1.0020e-04
Loss = 2.7230e-02, PNorm = 71.1944, GNorm = 0.4618, lr_0 = 1.0013e-04
Loss = 3.1134e-02, PNorm = 71.1960, GNorm = 0.6096, lr_0 = 1.0006e-04
Loss = 2.9431e-02, PNorm = 71.1973, GNorm = 0.4654, lr_0 = 1.0000e-04
Validation mae = 0.392308
Model 0 best validation mae = 0.384220 on epoch 19
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Moving model to cuda
Model 0 test mae = 0.380340
Ensemble test mae = 0.380340
Fold 9
Splitting data with seed 9
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.0, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=400, bias=False)
        (W_h): Linear(in_features=400, out_features=400, bias=False)
        (W_o): Linear(in_features=533, out_features=400, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=400, out_features=400, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=400, out_features=1, bias=True)
  )
)
Number of parameters = 593,201
Moving model to cuda
Epoch 0
Loss = 1.0084e+00, PNorm = 38.3711, GNorm = 1.8157, lr_0 = 1.0413e-04
Loss = 8.3623e-01, PNorm = 38.3740, GNorm = 3.4547, lr_0 = 1.0788e-04
Loss = 7.2997e-01, PNorm = 38.3767, GNorm = 6.3926, lr_0 = 1.1163e-04
Loss = 7.4573e-01, PNorm = 38.3794, GNorm = 5.5899, lr_0 = 1.1537e-04
Loss = 6.5792e-01, PNorm = 38.3829, GNorm = 4.7561, lr_0 = 1.1913e-04
Loss = 6.9897e-01, PNorm = 38.3870, GNorm = 6.3951, lr_0 = 1.2287e-04
Loss = 6.5690e-01, PNorm = 38.3918, GNorm = 2.3778, lr_0 = 1.2663e-04
Loss = 5.9452e-01, PNorm = 38.3980, GNorm = 4.0555, lr_0 = 1.3038e-04
Loss = 6.2328e-01, PNorm = 38.4037, GNorm = 1.9536, lr_0 = 1.3413e-04
Loss = 5.8192e-01, PNorm = 38.4095, GNorm = 9.7773, lr_0 = 1.3788e-04
Loss = 6.0743e-01, PNorm = 38.4158, GNorm = 14.2725, lr_0 = 1.4163e-04
Loss = 4.9781e-01, PNorm = 38.4226, GNorm = 1.4458, lr_0 = 1.4537e-04
Loss = 4.9863e-01, PNorm = 38.4283, GNorm = 10.8654, lr_0 = 1.4913e-04
Loss = 4.2849e-01, PNorm = 38.4337, GNorm = 22.1601, lr_0 = 1.5288e-04
Loss = 5.1456e-01, PNorm = 38.4389, GNorm = 3.9212, lr_0 = 1.5662e-04
Loss = 4.3200e-01, PNorm = 38.4439, GNorm = 16.4068, lr_0 = 1.6038e-04
Loss = 4.2967e-01, PNorm = 38.4488, GNorm = 11.4773, lr_0 = 1.6412e-04
Loss = 4.3100e-01, PNorm = 38.4538, GNorm = 2.7919, lr_0 = 1.6788e-04
Loss = 4.5995e-01, PNorm = 38.4584, GNorm = 8.8972, lr_0 = 1.7163e-04
Loss = 4.5934e-01, PNorm = 38.4628, GNorm = 4.4785, lr_0 = 1.7538e-04
Loss = 4.3222e-01, PNorm = 38.4682, GNorm = 2.5779, lr_0 = 1.7913e-04
Loss = 4.1651e-01, PNorm = 38.4744, GNorm = 12.8285, lr_0 = 1.8288e-04
Loss = 3.8626e-01, PNorm = 38.4788, GNorm = 2.2640, lr_0 = 1.8662e-04
Loss = 3.9222e-01, PNorm = 38.4846, GNorm = 16.2608, lr_0 = 1.9038e-04
Loss = 4.2073e-01, PNorm = 38.4887, GNorm = 24.6630, lr_0 = 1.9413e-04
Loss = 4.3537e-01, PNorm = 38.4918, GNorm = 5.8847, lr_0 = 1.9788e-04
Loss = 4.1194e-01, PNorm = 38.4958, GNorm = 2.9506, lr_0 = 2.0163e-04
Loss = 3.7461e-01, PNorm = 38.5035, GNorm = 13.2911, lr_0 = 2.0537e-04
Loss = 3.9758e-01, PNorm = 38.5107, GNorm = 1.2550, lr_0 = 2.0913e-04
Loss = 3.5153e-01, PNorm = 38.5186, GNorm = 3.6180, lr_0 = 2.1288e-04
Loss = 3.7169e-01, PNorm = 38.5259, GNorm = 1.7811, lr_0 = 2.1663e-04
Loss = 3.6210e-01, PNorm = 38.5339, GNorm = 8.7375, lr_0 = 2.2038e-04
Loss = 3.7747e-01, PNorm = 38.5384, GNorm = 1.9558, lr_0 = 2.2412e-04
Loss = 3.4929e-01, PNorm = 38.5416, GNorm = 27.8279, lr_0 = 2.2787e-04
Loss = 4.1664e-01, PNorm = 38.5463, GNorm = 20.1705, lr_0 = 2.3163e-04
Loss = 4.7489e-01, PNorm = 38.5491, GNorm = 14.3545, lr_0 = 2.3538e-04
Loss = 4.1181e-01, PNorm = 38.5544, GNorm = 10.1416, lr_0 = 2.3913e-04
Loss = 4.1506e-01, PNorm = 38.5617, GNorm = 6.1508, lr_0 = 2.4288e-04
Loss = 3.5293e-01, PNorm = 38.5698, GNorm = 2.9053, lr_0 = 2.4662e-04
Loss = 3.5175e-01, PNorm = 38.5779, GNorm = 6.8388, lr_0 = 2.5038e-04
Loss = 3.8422e-01, PNorm = 38.5856, GNorm = 4.2517, lr_0 = 2.5413e-04
Loss = 3.7602e-01, PNorm = 38.5907, GNorm = 4.6121, lr_0 = 2.5788e-04
Loss = 3.0736e-01, PNorm = 38.5953, GNorm = 6.6383, lr_0 = 2.6163e-04
Loss = 3.0928e-01, PNorm = 38.6015, GNorm = 9.9640, lr_0 = 2.6537e-04
Loss = 3.4338e-01, PNorm = 38.6104, GNorm = 7.6694, lr_0 = 2.6912e-04
Loss = 2.9715e-01, PNorm = 38.6209, GNorm = 2.9582, lr_0 = 2.7288e-04
Loss = 3.7485e-01, PNorm = 38.6280, GNorm = 9.3643, lr_0 = 2.7663e-04
Loss = 3.4814e-01, PNorm = 38.6325, GNorm = 2.5709, lr_0 = 2.8038e-04
Loss = 3.5058e-01, PNorm = 38.6404, GNorm = 11.2291, lr_0 = 2.8413e-04
Loss = 3.2079e-01, PNorm = 38.6480, GNorm = 3.7225, lr_0 = 2.8787e-04
Loss = 3.6957e-01, PNorm = 38.6518, GNorm = 14.3771, lr_0 = 2.9163e-04
Loss = 3.4330e-01, PNorm = 38.6570, GNorm = 7.5433, lr_0 = 2.9538e-04
Loss = 3.1678e-01, PNorm = 38.6628, GNorm = 2.2768, lr_0 = 2.9913e-04
Loss = 3.2524e-01, PNorm = 38.6721, GNorm = 7.4893, lr_0 = 3.0288e-04
Loss = 3.3664e-01, PNorm = 38.6844, GNorm = 3.0823, lr_0 = 3.0662e-04
Loss = 2.9302e-01, PNorm = 38.6919, GNorm = 22.8226, lr_0 = 3.1037e-04
Loss = 3.4149e-01, PNorm = 38.6949, GNorm = 2.1268, lr_0 = 3.1413e-04
Loss = 3.2505e-01, PNorm = 38.7036, GNorm = 2.7482, lr_0 = 3.1788e-04
Loss = 3.0475e-01, PNorm = 38.7115, GNorm = 2.6745, lr_0 = 3.2163e-04
Loss = 2.9292e-01, PNorm = 38.7180, GNorm = 2.9754, lr_0 = 3.2538e-04
Loss = 2.8172e-01, PNorm = 38.7260, GNorm = 2.3462, lr_0 = 3.2912e-04
Loss = 3.8301e-01, PNorm = 38.7289, GNorm = 18.1477, lr_0 = 3.3288e-04
Loss = 3.2823e-01, PNorm = 38.7318, GNorm = 7.5498, lr_0 = 3.3663e-04
Loss = 3.7686e-01, PNorm = 38.7392, GNorm = 22.5960, lr_0 = 3.4038e-04
Loss = 3.7294e-01, PNorm = 38.7473, GNorm = 12.4330, lr_0 = 3.4413e-04
Loss = 3.6875e-01, PNorm = 38.7576, GNorm = 5.2493, lr_0 = 3.4787e-04
Loss = 3.1555e-01, PNorm = 38.7689, GNorm = 7.9920, lr_0 = 3.5162e-04
Loss = 2.9841e-01, PNorm = 38.7804, GNorm = 12.3086, lr_0 = 3.5538e-04
Loss = 3.4876e-01, PNorm = 38.7897, GNorm = 7.0136, lr_0 = 3.5913e-04
Loss = 3.2931e-01, PNorm = 38.7978, GNorm = 3.8258, lr_0 = 3.6288e-04
Loss = 4.0110e-01, PNorm = 38.8052, GNorm = 1.7373, lr_0 = 3.6662e-04
Loss = 3.6774e-01, PNorm = 38.8141, GNorm = 8.0438, lr_0 = 3.7037e-04
Loss = 3.3127e-01, PNorm = 38.8244, GNorm = 6.9833, lr_0 = 3.7413e-04
Loss = 3.4967e-01, PNorm = 38.8349, GNorm = 10.1938, lr_0 = 3.7788e-04
Loss = 3.5538e-01, PNorm = 38.8471, GNorm = 1.8694, lr_0 = 3.8163e-04
Loss = 2.8458e-01, PNorm = 38.8592, GNorm = 1.5103, lr_0 = 3.8537e-04
Loss = 2.9965e-01, PNorm = 38.8718, GNorm = 10.0572, lr_0 = 3.8912e-04
Loss = 2.9808e-01, PNorm = 38.8804, GNorm = 1.4428, lr_0 = 3.9287e-04
Loss = 3.2121e-01, PNorm = 38.8884, GNorm = 1.6182, lr_0 = 3.9663e-04
Loss = 3.1861e-01, PNorm = 38.8943, GNorm = 2.3211, lr_0 = 4.0038e-04
Loss = 3.1797e-01, PNorm = 38.9022, GNorm = 1.5675, lr_0 = 4.0413e-04
Loss = 2.9447e-01, PNorm = 38.9143, GNorm = 7.5053, lr_0 = 4.0787e-04
Loss = 2.5678e-01, PNorm = 38.9246, GNorm = 4.4571, lr_0 = 4.1162e-04
Loss = 2.7292e-01, PNorm = 38.9320, GNorm = 3.7041, lr_0 = 4.1537e-04
Loss = 2.9362e-01, PNorm = 38.9417, GNorm = 1.5425, lr_0 = 4.1913e-04
Loss = 2.5765e-01, PNorm = 38.9535, GNorm = 1.9645, lr_0 = 4.2288e-04
Loss = 2.6603e-01, PNorm = 38.9637, GNorm = 13.0454, lr_0 = 4.2662e-04
Loss = 3.1664e-01, PNorm = 38.9666, GNorm = 3.3533, lr_0 = 4.3037e-04
Loss = 3.3890e-01, PNorm = 38.9719, GNorm = 1.0993, lr_0 = 4.3412e-04
Loss = 3.1558e-01, PNorm = 38.9805, GNorm = 9.5486, lr_0 = 4.3788e-04
Loss = 3.7208e-01, PNorm = 38.9950, GNorm = 3.6930, lr_0 = 4.4163e-04
Loss = 3.2898e-01, PNorm = 39.0122, GNorm = 2.7664, lr_0 = 4.4538e-04
Loss = 3.3425e-01, PNorm = 39.0273, GNorm = 7.0741, lr_0 = 4.4912e-04
Loss = 3.0366e-01, PNorm = 39.0416, GNorm = 8.1995, lr_0 = 4.5287e-04
Loss = 3.4023e-01, PNorm = 39.0572, GNorm = 7.7747, lr_0 = 4.5662e-04
Loss = 3.1778e-01, PNorm = 39.0703, GNorm = 11.7788, lr_0 = 4.6038e-04
Loss = 2.7724e-01, PNorm = 39.0827, GNorm = 1.8042, lr_0 = 4.6413e-04
Loss = 3.5227e-01, PNorm = 39.0922, GNorm = 14.4398, lr_0 = 4.6787e-04
Loss = 3.0685e-01, PNorm = 39.1035, GNorm = 8.9735, lr_0 = 4.7162e-04
Loss = 3.0130e-01, PNorm = 39.1172, GNorm = 5.3455, lr_0 = 4.7537e-04
Loss = 3.6158e-01, PNorm = 39.1367, GNorm = 7.2662, lr_0 = 4.7913e-04
Loss = 2.9487e-01, PNorm = 39.1490, GNorm = 16.0637, lr_0 = 4.8288e-04
Loss = 2.9668e-01, PNorm = 39.1586, GNorm = 6.8084, lr_0 = 4.8663e-04
Loss = 2.8878e-01, PNorm = 39.1721, GNorm = 4.4277, lr_0 = 4.9038e-04
Loss = 2.7529e-01, PNorm = 39.1840, GNorm = 5.6800, lr_0 = 4.9412e-04
Loss = 3.1101e-01, PNorm = 39.1895, GNorm = 10.9336, lr_0 = 4.9788e-04
Loss = 2.8361e-01, PNorm = 39.1926, GNorm = 2.5026, lr_0 = 5.0163e-04
Loss = 3.0482e-01, PNorm = 39.2060, GNorm = 6.1471, lr_0 = 5.0538e-04
Loss = 2.7074e-01, PNorm = 39.2161, GNorm = 7.5365, lr_0 = 5.0913e-04
Loss = 2.7257e-01, PNorm = 39.2248, GNorm = 2.7915, lr_0 = 5.1287e-04
Loss = 2.4561e-01, PNorm = 39.2328, GNorm = 7.8667, lr_0 = 5.1663e-04
Loss = 2.7873e-01, PNorm = 39.2422, GNorm = 10.7510, lr_0 = 5.2038e-04
Loss = 3.1340e-01, PNorm = 39.2515, GNorm = 1.9012, lr_0 = 5.2413e-04
Loss = 3.0170e-01, PNorm = 39.2656, GNorm = 1.8860, lr_0 = 5.2788e-04
Loss = 3.1053e-01, PNorm = 39.2819, GNorm = 9.1740, lr_0 = 5.3162e-04
Loss = 3.0879e-01, PNorm = 39.2986, GNorm = 4.5664, lr_0 = 5.3538e-04
Loss = 2.4932e-01, PNorm = 39.3160, GNorm = 3.2251, lr_0 = 5.3912e-04
Loss = 2.6843e-01, PNorm = 39.3376, GNorm = 8.4780, lr_0 = 5.4288e-04
Loss = 2.5517e-01, PNorm = 39.3497, GNorm = 2.0578, lr_0 = 5.4663e-04
Loss = 2.7471e-01, PNorm = 39.3634, GNorm = 6.1890, lr_0 = 5.5038e-04
Validation mae = 0.577535
Epoch 1
Loss = 2.4382e-01, PNorm = 39.3793, GNorm = 1.3524, lr_0 = 5.5413e-04
Loss = 2.6163e-01, PNorm = 39.3927, GNorm = 5.7467, lr_0 = 5.5787e-04
Loss = 2.9294e-01, PNorm = 39.4097, GNorm = 12.3297, lr_0 = 5.6163e-04
Loss = 2.8860e-01, PNorm = 39.4240, GNorm = 12.8336, lr_0 = 5.6538e-04
Loss = 2.8396e-01, PNorm = 39.4393, GNorm = 1.4100, lr_0 = 5.6913e-04
Loss = 2.7013e-01, PNorm = 39.4563, GNorm = 11.2244, lr_0 = 5.7288e-04
Loss = 2.4008e-01, PNorm = 39.4713, GNorm = 6.0522, lr_0 = 5.7662e-04
Loss = 2.5202e-01, PNorm = 39.4876, GNorm = 6.5701, lr_0 = 5.8038e-04
Loss = 2.4914e-01, PNorm = 39.5042, GNorm = 5.6806, lr_0 = 5.8413e-04
Loss = 2.7335e-01, PNorm = 39.5185, GNorm = 5.6569, lr_0 = 5.8788e-04
Loss = 2.4500e-01, PNorm = 39.5273, GNorm = 4.5091, lr_0 = 5.9163e-04
Loss = 2.2496e-01, PNorm = 39.5409, GNorm = 1.9092, lr_0 = 5.9538e-04
Loss = 2.4207e-01, PNorm = 39.5599, GNorm = 6.8544, lr_0 = 5.9913e-04
Loss = 2.1882e-01, PNorm = 39.5715, GNorm = 2.7817, lr_0 = 6.0288e-04
Loss = 2.6633e-01, PNorm = 39.5823, GNorm = 4.1233, lr_0 = 6.0663e-04
Loss = 2.1585e-01, PNorm = 39.5967, GNorm = 1.5551, lr_0 = 6.1038e-04
Loss = 2.2347e-01, PNorm = 39.6103, GNorm = 4.7664, lr_0 = 6.1413e-04
Loss = 2.2222e-01, PNorm = 39.6241, GNorm = 1.4121, lr_0 = 6.1788e-04
Loss = 2.6954e-01, PNorm = 39.6331, GNorm = 3.6352, lr_0 = 6.2163e-04
Loss = 2.3307e-01, PNorm = 39.6452, GNorm = 2.3847, lr_0 = 6.2538e-04
Loss = 2.5465e-01, PNorm = 39.6565, GNorm = 8.5508, lr_0 = 6.2913e-04
Loss = 2.5009e-01, PNorm = 39.6685, GNorm = 10.0497, lr_0 = 6.3288e-04
Loss = 2.7850e-01, PNorm = 39.6816, GNorm = 4.9392, lr_0 = 6.3663e-04
Loss = 2.6014e-01, PNorm = 39.7012, GNorm = 1.2957, lr_0 = 6.4038e-04
Loss = 3.1902e-01, PNorm = 39.7193, GNorm = 5.6405, lr_0 = 6.4413e-04
Loss = 2.6762e-01, PNorm = 39.7442, GNorm = 1.4301, lr_0 = 6.4788e-04
Loss = 2.7794e-01, PNorm = 39.7602, GNorm = 5.0612, lr_0 = 6.5163e-04
Loss = 2.5399e-01, PNorm = 39.7830, GNorm = 3.6613, lr_0 = 6.5538e-04
Loss = 2.6293e-01, PNorm = 39.8006, GNorm = 7.9518, lr_0 = 6.5913e-04
Loss = 2.8918e-01, PNorm = 39.8158, GNorm = 0.9177, lr_0 = 6.6288e-04
Loss = 2.5735e-01, PNorm = 39.8334, GNorm = 3.9009, lr_0 = 6.6663e-04
Loss = 2.6030e-01, PNorm = 39.8470, GNorm = 4.4541, lr_0 = 6.7038e-04
Loss = 2.2772e-01, PNorm = 39.8586, GNorm = 2.8338, lr_0 = 6.7413e-04
Loss = 2.6855e-01, PNorm = 39.8774, GNorm = 3.9498, lr_0 = 6.7788e-04
Loss = 2.4350e-01, PNorm = 39.9002, GNorm = 2.0119, lr_0 = 6.8163e-04
Loss = 2.8702e-01, PNorm = 39.9161, GNorm = 1.6741, lr_0 = 6.8538e-04
Loss = 3.6139e-01, PNorm = 39.9306, GNorm = 1.5244, lr_0 = 6.8913e-04
Loss = 2.9808e-01, PNorm = 39.9561, GNorm = 5.9876, lr_0 = 6.9288e-04
Loss = 2.9733e-01, PNorm = 39.9811, GNorm = 1.7300, lr_0 = 6.9663e-04
Loss = 2.3918e-01, PNorm = 40.0035, GNorm = 1.4249, lr_0 = 7.0038e-04
Loss = 2.1490e-01, PNorm = 40.0229, GNorm = 2.3077, lr_0 = 7.0413e-04
Loss = 2.3358e-01, PNorm = 40.0358, GNorm = 1.4953, lr_0 = 7.0788e-04
Loss = 2.7289e-01, PNorm = 40.0549, GNorm = 5.1533, lr_0 = 7.1163e-04
Loss = 2.8235e-01, PNorm = 40.0745, GNorm = 2.9212, lr_0 = 7.1538e-04
Loss = 3.0043e-01, PNorm = 40.0989, GNorm = 7.5912, lr_0 = 7.1913e-04
Loss = 2.8075e-01, PNorm = 40.1121, GNorm = 0.9599, lr_0 = 7.2288e-04
Loss = 2.5951e-01, PNorm = 40.1332, GNorm = 4.4854, lr_0 = 7.2663e-04
Loss = 2.1774e-01, PNorm = 40.1545, GNorm = 4.8329, lr_0 = 7.3038e-04
Loss = 2.7896e-01, PNorm = 40.1757, GNorm = 2.6242, lr_0 = 7.3413e-04
Loss = 2.2613e-01, PNorm = 40.1932, GNorm = 4.1792, lr_0 = 7.3788e-04
Loss = 1.9954e-01, PNorm = 40.2026, GNorm = 1.3859, lr_0 = 7.4163e-04
Loss = 2.2449e-01, PNorm = 40.2229, GNorm = 1.2816, lr_0 = 7.4538e-04
Loss = 2.2938e-01, PNorm = 40.2394, GNorm = 5.3249, lr_0 = 7.4913e-04
Loss = 2.3999e-01, PNorm = 40.2520, GNorm = 5.7363, lr_0 = 7.5288e-04
Loss = 2.4720e-01, PNorm = 40.2713, GNorm = 1.5389, lr_0 = 7.5663e-04
Loss = 2.4317e-01, PNorm = 40.2940, GNorm = 3.6751, lr_0 = 7.6038e-04
Loss = 2.3194e-01, PNorm = 40.3198, GNorm = 1.6949, lr_0 = 7.6413e-04
Loss = 2.3194e-01, PNorm = 40.3419, GNorm = 6.8885, lr_0 = 7.6788e-04
Loss = 2.3241e-01, PNorm = 40.3645, GNorm = 1.0305, lr_0 = 7.7163e-04
Loss = 2.7987e-01, PNorm = 40.3838, GNorm = 10.4682, lr_0 = 7.7538e-04
Loss = 2.8813e-01, PNorm = 40.4026, GNorm = 7.1143, lr_0 = 7.7913e-04
Loss = 2.5672e-01, PNorm = 40.4315, GNorm = 6.3397, lr_0 = 7.8288e-04
Loss = 2.9089e-01, PNorm = 40.4608, GNorm = 4.6488, lr_0 = 7.8663e-04
Loss = 2.6093e-01, PNorm = 40.4812, GNorm = 1.6651, lr_0 = 7.9038e-04
Loss = 2.3201e-01, PNorm = 40.5074, GNorm = 7.2599, lr_0 = 7.9413e-04
Loss = 2.4010e-01, PNorm = 40.5254, GNorm = 3.3723, lr_0 = 7.9788e-04
Loss = 2.6973e-01, PNorm = 40.5455, GNorm = 6.1159, lr_0 = 8.0163e-04
Loss = 2.1294e-01, PNorm = 40.5756, GNorm = 1.6754, lr_0 = 8.0538e-04
Loss = 2.5101e-01, PNorm = 40.6031, GNorm = 6.0151, lr_0 = 8.0913e-04
Loss = 2.3552e-01, PNorm = 40.6265, GNorm = 2.1367, lr_0 = 8.1288e-04
Loss = 2.1039e-01, PNorm = 40.6493, GNorm = 4.7103, lr_0 = 8.1663e-04
Loss = 2.1797e-01, PNorm = 40.6702, GNorm = 7.1651, lr_0 = 8.2038e-04
Loss = 2.2796e-01, PNorm = 40.6883, GNorm = 2.0906, lr_0 = 8.2413e-04
Loss = 2.0654e-01, PNorm = 40.7078, GNorm = 6.6485, lr_0 = 8.2788e-04
Loss = 2.2036e-01, PNorm = 40.7291, GNorm = 3.4649, lr_0 = 8.3163e-04
Loss = 2.3508e-01, PNorm = 40.7451, GNorm = 4.0143, lr_0 = 8.3538e-04
Loss = 2.0505e-01, PNorm = 40.7631, GNorm = 3.0530, lr_0 = 8.3913e-04
Loss = 1.8716e-01, PNorm = 40.7820, GNorm = 2.8544, lr_0 = 8.4288e-04
Loss = 2.0841e-01, PNorm = 40.8041, GNorm = 0.9766, lr_0 = 8.4663e-04
Loss = 1.8890e-01, PNorm = 40.8311, GNorm = 2.6571, lr_0 = 8.5038e-04
Loss = 2.2426e-01, PNorm = 40.8555, GNorm = 2.5914, lr_0 = 8.5413e-04
Loss = 2.7702e-01, PNorm = 40.8805, GNorm = 3.0417, lr_0 = 8.5788e-04
Loss = 2.3958e-01, PNorm = 40.9110, GNorm = 1.0051, lr_0 = 8.6163e-04
Loss = 2.4180e-01, PNorm = 40.9382, GNorm = 4.9801, lr_0 = 8.6538e-04
Loss = 2.0944e-01, PNorm = 40.9614, GNorm = 3.5358, lr_0 = 8.6913e-04
Loss = 2.5332e-01, PNorm = 40.9776, GNorm = 4.1438, lr_0 = 8.7288e-04
Loss = 2.8136e-01, PNorm = 41.0037, GNorm = 2.3927, lr_0 = 8.7663e-04
Loss = 3.0080e-01, PNorm = 41.0276, GNorm = 2.8849, lr_0 = 8.8038e-04
Loss = 3.1365e-01, PNorm = 41.0690, GNorm = 6.2319, lr_0 = 8.8413e-04
Loss = 2.5337e-01, PNorm = 41.1025, GNorm = 1.9741, lr_0 = 8.8788e-04
Loss = 2.4396e-01, PNorm = 41.1289, GNorm = 1.5883, lr_0 = 8.9163e-04
Loss = 2.5391e-01, PNorm = 41.1574, GNorm = 8.5451, lr_0 = 8.9538e-04
Loss = 2.0903e-01, PNorm = 41.1793, GNorm = 3.4102, lr_0 = 8.9913e-04
Loss = 2.7988e-01, PNorm = 41.2113, GNorm = 3.4679, lr_0 = 9.0288e-04
Loss = 2.6393e-01, PNorm = 41.2423, GNorm = 9.9332, lr_0 = 9.0663e-04
Loss = 2.2828e-01, PNorm = 41.2719, GNorm = 2.8720, lr_0 = 9.1038e-04
Loss = 2.3111e-01, PNorm = 41.3011, GNorm = 6.0367, lr_0 = 9.1413e-04
Loss = 2.4188e-01, PNorm = 41.3151, GNorm = 3.9479, lr_0 = 9.1788e-04
Loss = 3.0524e-01, PNorm = 41.3352, GNorm = 6.4320, lr_0 = 9.2163e-04
Loss = 2.2940e-01, PNorm = 41.3673, GNorm = 2.1708, lr_0 = 9.2538e-04
Loss = 2.3342e-01, PNorm = 41.3913, GNorm = 3.6293, lr_0 = 9.2913e-04
Loss = 2.7370e-01, PNorm = 41.4301, GNorm = 7.9263, lr_0 = 9.3288e-04
Loss = 2.1141e-01, PNorm = 41.4714, GNorm = 3.0377, lr_0 = 9.3663e-04
Loss = 2.1626e-01, PNorm = 41.4894, GNorm = 4.4220, lr_0 = 9.4038e-04
Loss = 2.6096e-01, PNorm = 41.5048, GNorm = 2.9217, lr_0 = 9.4413e-04
Loss = 2.2702e-01, PNorm = 41.5290, GNorm = 5.3813, lr_0 = 9.4788e-04
Loss = 2.6076e-01, PNorm = 41.5576, GNorm = 4.4623, lr_0 = 9.5163e-04
Loss = 2.6035e-01, PNorm = 41.5801, GNorm = 3.4901, lr_0 = 9.5538e-04
Loss = 2.6883e-01, PNorm = 41.6139, GNorm = 4.4689, lr_0 = 9.5913e-04
Loss = 2.2382e-01, PNorm = 41.6501, GNorm = 0.7063, lr_0 = 9.6288e-04
Loss = 2.1639e-01, PNorm = 41.6701, GNorm = 4.6159, lr_0 = 9.6663e-04
Loss = 2.3534e-01, PNorm = 41.6869, GNorm = 1.0270, lr_0 = 9.7038e-04
Loss = 2.5040e-01, PNorm = 41.7172, GNorm = 3.5984, lr_0 = 9.7413e-04
Loss = 2.4393e-01, PNorm = 41.7481, GNorm = 3.2418, lr_0 = 9.7788e-04
Loss = 2.2736e-01, PNorm = 41.7794, GNorm = 0.9094, lr_0 = 9.8163e-04
Loss = 2.1938e-01, PNorm = 41.8112, GNorm = 1.5123, lr_0 = 9.8537e-04
Loss = 2.3303e-01, PNorm = 41.8355, GNorm = 4.7290, lr_0 = 9.8912e-04
Loss = 2.0997e-01, PNorm = 41.8604, GNorm = 3.2221, lr_0 = 9.9288e-04
Loss = 2.1493e-01, PNorm = 41.8846, GNorm = 2.7832, lr_0 = 9.9663e-04
Loss = 2.4382e-01, PNorm = 41.9045, GNorm = 8.4841, lr_0 = 9.9993e-04
Validation mae = 0.524244
Epoch 2
Loss = 2.3853e-01, PNorm = 41.9432, GNorm = 1.6490, lr_0 = 9.9925e-04
Loss = 2.0327e-01, PNorm = 41.9821, GNorm = 3.0277, lr_0 = 9.9856e-04
Loss = 2.2013e-01, PNorm = 42.0011, GNorm = 2.5188, lr_0 = 9.9788e-04
Loss = 1.8874e-01, PNorm = 42.0241, GNorm = 1.4271, lr_0 = 9.9719e-04
Loss = 2.1714e-01, PNorm = 42.0415, GNorm = 1.8398, lr_0 = 9.9651e-04
Loss = 2.4495e-01, PNorm = 42.0743, GNorm = 4.3041, lr_0 = 9.9583e-04
Loss = 1.7676e-01, PNorm = 42.1007, GNorm = 2.1135, lr_0 = 9.9515e-04
Loss = 1.9904e-01, PNorm = 42.1334, GNorm = 1.1675, lr_0 = 9.9446e-04
Loss = 2.2829e-01, PNorm = 42.1535, GNorm = 1.8665, lr_0 = 9.9378e-04
Loss = 1.8760e-01, PNorm = 42.1719, GNorm = 4.2971, lr_0 = 9.9310e-04
Loss = 2.2544e-01, PNorm = 42.2042, GNorm = 5.0221, lr_0 = 9.9242e-04
Loss = 2.2016e-01, PNorm = 42.2417, GNorm = 2.6312, lr_0 = 9.9174e-04
Loss = 2.0573e-01, PNorm = 42.2639, GNorm = 4.7182, lr_0 = 9.9106e-04
Loss = 2.1084e-01, PNorm = 42.2893, GNorm = 0.7465, lr_0 = 9.9038e-04
Loss = 2.4212e-01, PNorm = 42.3240, GNorm = 0.8807, lr_0 = 9.8971e-04
Loss = 2.2078e-01, PNorm = 42.3660, GNorm = 3.6532, lr_0 = 9.8903e-04
Loss = 2.3572e-01, PNorm = 42.3918, GNorm = 0.7478, lr_0 = 9.8835e-04
Loss = 2.0211e-01, PNorm = 42.4261, GNorm = 4.3137, lr_0 = 9.8767e-04
Loss = 2.5686e-01, PNorm = 42.4663, GNorm = 2.1946, lr_0 = 9.8700e-04
Loss = 2.0764e-01, PNorm = 42.5030, GNorm = 2.6791, lr_0 = 9.8632e-04
Loss = 1.9515e-01, PNorm = 42.5295, GNorm = 1.4435, lr_0 = 9.8564e-04
Loss = 2.0610e-01, PNorm = 42.5481, GNorm = 1.9726, lr_0 = 9.8497e-04
Loss = 2.1075e-01, PNorm = 42.5777, GNorm = 1.8159, lr_0 = 9.8429e-04
Loss = 2.1402e-01, PNorm = 42.6055, GNorm = 1.4355, lr_0 = 9.8362e-04
Loss = 2.1540e-01, PNorm = 42.6306, GNorm = 3.6360, lr_0 = 9.8295e-04
Loss = 2.3238e-01, PNorm = 42.6561, GNorm = 3.9621, lr_0 = 9.8227e-04
Loss = 2.3150e-01, PNorm = 42.6888, GNorm = 4.0916, lr_0 = 9.8160e-04
Loss = 2.0358e-01, PNorm = 42.7329, GNorm = 0.8953, lr_0 = 9.8093e-04
Loss = 1.9472e-01, PNorm = 42.7643, GNorm = 1.8122, lr_0 = 9.8026e-04
Loss = 1.8128e-01, PNorm = 42.7825, GNorm = 0.9760, lr_0 = 9.7958e-04
Loss = 2.2372e-01, PNorm = 42.8040, GNorm = 3.1185, lr_0 = 9.7891e-04
Loss = 1.9083e-01, PNorm = 42.8393, GNorm = 1.1660, lr_0 = 9.7824e-04
Loss = 2.0939e-01, PNorm = 42.8702, GNorm = 1.5463, lr_0 = 9.7757e-04
Loss = 1.9705e-01, PNorm = 42.8905, GNorm = 1.6776, lr_0 = 9.7690e-04
Loss = 2.1509e-01, PNorm = 42.9223, GNorm = 1.2761, lr_0 = 9.7623e-04
Loss = 2.0072e-01, PNorm = 42.9424, GNorm = 5.4666, lr_0 = 9.7556e-04
Loss = 2.0314e-01, PNorm = 42.9564, GNorm = 3.1447, lr_0 = 9.7490e-04
Loss = 1.9641e-01, PNorm = 42.9761, GNorm = 3.4260, lr_0 = 9.7423e-04
Loss = 2.2109e-01, PNorm = 43.0010, GNorm = 1.7485, lr_0 = 9.7356e-04
Loss = 1.9189e-01, PNorm = 43.0281, GNorm = 1.6088, lr_0 = 9.7289e-04
Loss = 1.9230e-01, PNorm = 43.0538, GNorm = 2.6901, lr_0 = 9.7223e-04
Loss = 2.0512e-01, PNorm = 43.0712, GNorm = 0.8504, lr_0 = 9.7156e-04
Loss = 1.8602e-01, PNorm = 43.0853, GNorm = 1.7108, lr_0 = 9.7090e-04
Loss = 1.7652e-01, PNorm = 43.1088, GNorm = 0.6856, lr_0 = 9.7023e-04
Loss = 1.9551e-01, PNorm = 43.1406, GNorm = 1.3423, lr_0 = 9.6957e-04
Loss = 2.0815e-01, PNorm = 43.1738, GNorm = 3.3589, lr_0 = 9.6890e-04
Loss = 1.5108e-01, PNorm = 43.1978, GNorm = 1.9513, lr_0 = 9.6824e-04
Loss = 2.5096e-01, PNorm = 43.2273, GNorm = 2.8997, lr_0 = 9.6757e-04
Loss = 1.9854e-01, PNorm = 43.2610, GNorm = 5.2247, lr_0 = 9.6691e-04
Loss = 2.2015e-01, PNorm = 43.2939, GNorm = 2.9178, lr_0 = 9.6625e-04
Loss = 1.9243e-01, PNorm = 43.3321, GNorm = 1.0153, lr_0 = 9.6559e-04
Loss = 2.0040e-01, PNorm = 43.3567, GNorm = 1.1805, lr_0 = 9.6493e-04
Loss = 2.2124e-01, PNorm = 43.3811, GNorm = 2.4829, lr_0 = 9.6427e-04
Loss = 1.9710e-01, PNorm = 43.4021, GNorm = 1.5130, lr_0 = 9.6360e-04
Loss = 2.1280e-01, PNorm = 43.4320, GNorm = 2.5141, lr_0 = 9.6294e-04
Loss = 2.1494e-01, PNorm = 43.4540, GNorm = 2.9601, lr_0 = 9.6228e-04
Loss = 2.1462e-01, PNorm = 43.4679, GNorm = 0.8428, lr_0 = 9.6163e-04
Loss = 1.9855e-01, PNorm = 43.4926, GNorm = 4.4119, lr_0 = 9.6097e-04
Loss = 1.7326e-01, PNorm = 43.5165, GNorm = 3.2950, lr_0 = 9.6031e-04
Loss = 1.5631e-01, PNorm = 43.5384, GNorm = 1.0430, lr_0 = 9.5965e-04
Loss = 1.6930e-01, PNorm = 43.5567, GNorm = 0.8648, lr_0 = 9.5899e-04
Loss = 1.9353e-01, PNorm = 43.5827, GNorm = 1.5938, lr_0 = 9.5834e-04
Loss = 1.8296e-01, PNorm = 43.6074, GNorm = 1.8122, lr_0 = 9.5768e-04
Loss = 1.7744e-01, PNorm = 43.6345, GNorm = 0.5783, lr_0 = 9.5702e-04
Loss = 1.7789e-01, PNorm = 43.6592, GNorm = 1.7899, lr_0 = 9.5637e-04
Loss = 1.9541e-01, PNorm = 43.6793, GNorm = 2.3932, lr_0 = 9.5571e-04
Loss = 2.0167e-01, PNorm = 43.6973, GNorm = 1.2776, lr_0 = 9.5506e-04
Loss = 2.0010e-01, PNorm = 43.7259, GNorm = 1.1743, lr_0 = 9.5440e-04
Loss = 2.0056e-01, PNorm = 43.7531, GNorm = 1.4092, lr_0 = 9.5375e-04
Loss = 1.8047e-01, PNorm = 43.7720, GNorm = 2.7026, lr_0 = 9.5310e-04
Loss = 1.7191e-01, PNorm = 43.7982, GNorm = 3.6104, lr_0 = 9.5244e-04
Loss = 1.9959e-01, PNorm = 43.8147, GNorm = 2.2385, lr_0 = 9.5179e-04
Loss = 2.3069e-01, PNorm = 43.8355, GNorm = 1.4643, lr_0 = 9.5114e-04
Loss = 2.0086e-01, PNorm = 43.8650, GNorm = 3.2302, lr_0 = 9.5049e-04
Loss = 2.1000e-01, PNorm = 43.8931, GNorm = 1.5203, lr_0 = 9.4984e-04
Loss = 2.4981e-01, PNorm = 43.9206, GNorm = 4.9105, lr_0 = 9.4919e-04
Loss = 2.1882e-01, PNorm = 43.9519, GNorm = 1.8406, lr_0 = 9.4854e-04
Loss = 1.8333e-01, PNorm = 43.9781, GNorm = 1.0086, lr_0 = 9.4789e-04
Loss = 1.9211e-01, PNorm = 44.0012, GNorm = 4.3827, lr_0 = 9.4724e-04
Loss = 1.9041e-01, PNorm = 44.0337, GNorm = 1.4004, lr_0 = 9.4659e-04
Loss = 2.0863e-01, PNorm = 44.0687, GNorm = 1.8636, lr_0 = 9.4594e-04
Loss = 2.1886e-01, PNorm = 44.0924, GNorm = 4.1447, lr_0 = 9.4529e-04
Loss = 2.0167e-01, PNorm = 44.1075, GNorm = 1.2796, lr_0 = 9.4464e-04
Loss = 1.9449e-01, PNorm = 44.1319, GNorm = 2.0537, lr_0 = 9.4400e-04
Loss = 1.8748e-01, PNorm = 44.1588, GNorm = 0.7423, lr_0 = 9.4335e-04
Loss = 1.7490e-01, PNorm = 44.1850, GNorm = 3.1655, lr_0 = 9.4270e-04
Loss = 1.9750e-01, PNorm = 44.2119, GNorm = 2.3090, lr_0 = 9.4206e-04
Loss = 2.0616e-01, PNorm = 44.2373, GNorm = 1.7075, lr_0 = 9.4141e-04
Loss = 2.4665e-01, PNorm = 44.2641, GNorm = 0.9899, lr_0 = 9.4077e-04
Loss = 2.0111e-01, PNorm = 44.2936, GNorm = 1.2534, lr_0 = 9.4012e-04
Loss = 1.8228e-01, PNorm = 44.3228, GNorm = 1.4006, lr_0 = 9.3948e-04
Loss = 1.9014e-01, PNorm = 44.3574, GNorm = 2.5888, lr_0 = 9.3884e-04
Loss = 1.7741e-01, PNorm = 44.3891, GNorm = 2.7321, lr_0 = 9.3819e-04
Loss = 1.8584e-01, PNorm = 44.4165, GNorm = 1.1596, lr_0 = 9.3755e-04
Loss = 2.0068e-01, PNorm = 44.4398, GNorm = 1.2663, lr_0 = 9.3691e-04
Loss = 1.7394e-01, PNorm = 44.4628, GNorm = 0.8945, lr_0 = 9.3627e-04
Loss = 2.1772e-01, PNorm = 44.4873, GNorm = 0.6934, lr_0 = 9.3562e-04
Loss = 2.1287e-01, PNorm = 44.5082, GNorm = 6.2570, lr_0 = 9.3498e-04
Loss = 2.1876e-01, PNorm = 44.5411, GNorm = 1.9362, lr_0 = 9.3434e-04
Loss = 1.9345e-01, PNorm = 44.5705, GNorm = 2.5536, lr_0 = 9.3370e-04
Loss = 1.8947e-01, PNorm = 44.5935, GNorm = 1.4844, lr_0 = 9.3306e-04
Loss = 1.8304e-01, PNorm = 44.6113, GNorm = 1.9770, lr_0 = 9.3242e-04
Loss = 1.7952e-01, PNorm = 44.6358, GNorm = 0.9681, lr_0 = 9.3178e-04
Loss = 1.8664e-01, PNorm = 44.6517, GNorm = 3.7166, lr_0 = 9.3115e-04
Loss = 2.1439e-01, PNorm = 44.6680, GNorm = 3.4176, lr_0 = 9.3051e-04
Loss = 2.1774e-01, PNorm = 44.6893, GNorm = 1.4134, lr_0 = 9.2987e-04
Loss = 2.0238e-01, PNorm = 44.7180, GNorm = 3.6009, lr_0 = 9.2923e-04
Loss = 1.8909e-01, PNorm = 44.7414, GNorm = 2.1115, lr_0 = 9.2860e-04
Loss = 2.2164e-01, PNorm = 44.7661, GNorm = 1.8544, lr_0 = 9.2796e-04
Loss = 1.6523e-01, PNorm = 44.7960, GNorm = 2.2955, lr_0 = 9.2733e-04
Loss = 1.6118e-01, PNorm = 44.8158, GNorm = 2.1298, lr_0 = 9.2669e-04
Loss = 1.7273e-01, PNorm = 44.8279, GNorm = 2.5684, lr_0 = 9.2606e-04
Loss = 1.7948e-01, PNorm = 44.8332, GNorm = 2.1922, lr_0 = 9.2542e-04
Loss = 1.4919e-01, PNorm = 44.8514, GNorm = 2.2237, lr_0 = 9.2479e-04
Loss = 1.8746e-01, PNorm = 44.8649, GNorm = 2.7898, lr_0 = 9.2415e-04
Loss = 2.1006e-01, PNorm = 44.8883, GNorm = 2.5076, lr_0 = 9.2352e-04
Loss = 1.8028e-01, PNorm = 44.9045, GNorm = 1.1077, lr_0 = 9.2289e-04
Loss = 2.0311e-01, PNorm = 44.9265, GNorm = 3.8332, lr_0 = 9.2226e-04
Loss = 1.8225e-01, PNorm = 44.9523, GNorm = 1.2020, lr_0 = 9.2162e-04
Loss = 1.7318e-01, PNorm = 44.9769, GNorm = 2.6553, lr_0 = 9.2099e-04
Validation mae = 0.471740
Epoch 3
Loss = 1.6636e-01, PNorm = 45.0054, GNorm = 0.8052, lr_0 = 9.2036e-04
Loss = 1.6224e-01, PNorm = 45.0334, GNorm = 1.1315, lr_0 = 9.1973e-04
Loss = 1.7209e-01, PNorm = 45.0591, GNorm = 2.5878, lr_0 = 9.1910e-04
Loss = 1.8906e-01, PNorm = 45.0821, GNorm = 0.8789, lr_0 = 9.1847e-04
Loss = 1.9778e-01, PNorm = 45.1106, GNorm = 2.9856, lr_0 = 9.1784e-04
Loss = 1.8867e-01, PNorm = 45.1429, GNorm = 2.5343, lr_0 = 9.1721e-04
Loss = 2.1035e-01, PNorm = 45.1752, GNorm = 0.7852, lr_0 = 9.1658e-04
Loss = 1.8531e-01, PNorm = 45.2119, GNorm = 1.6488, lr_0 = 9.1596e-04
Loss = 1.7455e-01, PNorm = 45.2550, GNorm = 0.6153, lr_0 = 9.1533e-04
Loss = 1.8515e-01, PNorm = 45.2831, GNorm = 1.4064, lr_0 = 9.1470e-04
Loss = 1.7886e-01, PNorm = 45.3065, GNorm = 1.7980, lr_0 = 9.1408e-04
Loss = 1.5480e-01, PNorm = 45.3261, GNorm = 2.1982, lr_0 = 9.1345e-04
Loss = 1.5950e-01, PNorm = 45.3424, GNorm = 1.6112, lr_0 = 9.1282e-04
Loss = 1.8256e-01, PNorm = 45.3665, GNorm = 5.4875, lr_0 = 9.1220e-04
Loss = 1.5562e-01, PNorm = 45.3931, GNorm = 1.4378, lr_0 = 9.1157e-04
Loss = 1.9329e-01, PNorm = 45.4137, GNorm = 1.6901, lr_0 = 9.1095e-04
Loss = 2.1080e-01, PNorm = 45.4402, GNorm = 3.4702, lr_0 = 9.1032e-04
Loss = 2.4691e-01, PNorm = 45.4730, GNorm = 3.7138, lr_0 = 9.0970e-04
Loss = 1.7976e-01, PNorm = 45.5113, GNorm = 3.7204, lr_0 = 9.0908e-04
Loss = 1.7807e-01, PNorm = 45.5372, GNorm = 2.1400, lr_0 = 9.0846e-04
Loss = 1.5139e-01, PNorm = 45.5605, GNorm = 1.9351, lr_0 = 9.0783e-04
Loss = 1.6073e-01, PNorm = 45.5784, GNorm = 1.4721, lr_0 = 9.0721e-04
Loss = 1.7887e-01, PNorm = 45.6055, GNorm = 2.5422, lr_0 = 9.0659e-04
Loss = 1.9839e-01, PNorm = 45.6310, GNorm = 0.8135, lr_0 = 9.0597e-04
Loss = 2.0260e-01, PNorm = 45.6560, GNorm = 3.8163, lr_0 = 9.0535e-04
Loss = 1.8363e-01, PNorm = 45.6788, GNorm = 1.0954, lr_0 = 9.0473e-04
Loss = 1.8284e-01, PNorm = 45.7127, GNorm = 2.0479, lr_0 = 9.0411e-04
Loss = 1.5583e-01, PNorm = 45.7404, GNorm = 0.6746, lr_0 = 9.0349e-04
Loss = 1.8178e-01, PNorm = 45.7619, GNorm = 1.3297, lr_0 = 9.0287e-04
Loss = 1.9993e-01, PNorm = 45.7815, GNorm = 3.3389, lr_0 = 9.0225e-04
Loss = 2.1465e-01, PNorm = 45.8137, GNorm = 1.0609, lr_0 = 9.0163e-04
Loss = 2.2099e-01, PNorm = 45.8417, GNorm = 2.6410, lr_0 = 9.0102e-04
Loss = 1.7737e-01, PNorm = 45.8659, GNorm = 1.1811, lr_0 = 9.0040e-04
Loss = 1.7301e-01, PNorm = 45.8903, GNorm = 1.9969, lr_0 = 8.9978e-04
Loss = 1.8569e-01, PNorm = 45.9176, GNorm = 0.9052, lr_0 = 8.9916e-04
Loss = 1.9724e-01, PNorm = 45.9472, GNorm = 0.9235, lr_0 = 8.9855e-04
Loss = 1.8105e-01, PNorm = 45.9786, GNorm = 1.5649, lr_0 = 8.9793e-04
Loss = 1.7921e-01, PNorm = 46.0038, GNorm = 2.8568, lr_0 = 8.9732e-04
Loss = 1.6738e-01, PNorm = 46.0322, GNorm = 0.8159, lr_0 = 8.9670e-04
Loss = 1.8555e-01, PNorm = 46.0633, GNorm = 0.7408, lr_0 = 8.9609e-04
Loss = 1.8546e-01, PNorm = 46.0899, GNorm = 0.8460, lr_0 = 8.9548e-04
Loss = 1.4930e-01, PNorm = 46.1098, GNorm = 0.9272, lr_0 = 8.9486e-04
Loss = 1.5640e-01, PNorm = 46.1317, GNorm = 1.0081, lr_0 = 8.9425e-04
Loss = 1.5088e-01, PNorm = 46.1528, GNorm = 1.3924, lr_0 = 8.9364e-04
Loss = 1.4421e-01, PNorm = 46.1682, GNorm = 2.1073, lr_0 = 8.9302e-04
Loss = 1.5455e-01, PNorm = 46.1790, GNorm = 2.6093, lr_0 = 8.9241e-04
Loss = 1.6877e-01, PNorm = 46.1982, GNorm = 1.8713, lr_0 = 8.9180e-04
Loss = 1.7686e-01, PNorm = 46.2166, GNorm = 0.8490, lr_0 = 8.9119e-04
Loss = 1.6851e-01, PNorm = 46.2364, GNorm = 1.4950, lr_0 = 8.9058e-04
Loss = 1.8571e-01, PNorm = 46.2564, GNorm = 1.0110, lr_0 = 8.8997e-04
Loss = 1.6256e-01, PNorm = 46.2679, GNorm = 1.3281, lr_0 = 8.8936e-04
Loss = 1.8287e-01, PNorm = 46.2857, GNorm = 0.8455, lr_0 = 8.8875e-04
Loss = 1.6991e-01, PNorm = 46.3138, GNorm = 1.0725, lr_0 = 8.8814e-04
Loss = 1.6896e-01, PNorm = 46.3379, GNorm = 4.5627, lr_0 = 8.8753e-04
Loss = 1.6814e-01, PNorm = 46.3587, GNorm = 2.1265, lr_0 = 8.8693e-04
Loss = 1.9358e-01, PNorm = 46.3785, GNorm = 2.1509, lr_0 = 8.8632e-04
Loss = 1.8312e-01, PNorm = 46.3994, GNorm = 2.4984, lr_0 = 8.8571e-04
Loss = 1.5980e-01, PNorm = 46.4213, GNorm = 0.7910, lr_0 = 8.8510e-04
Loss = 1.6854e-01, PNorm = 46.4470, GNorm = 2.8282, lr_0 = 8.8450e-04
Loss = 1.6227e-01, PNorm = 46.4694, GNorm = 2.5483, lr_0 = 8.8389e-04
Loss = 1.8015e-01, PNorm = 46.4917, GNorm = 0.8649, lr_0 = 8.8329e-04
Loss = 1.4087e-01, PNorm = 46.5139, GNorm = 1.0230, lr_0 = 8.8268e-04
Loss = 1.7264e-01, PNorm = 46.5282, GNorm = 1.7418, lr_0 = 8.8208e-04
Loss = 1.7874e-01, PNorm = 46.5439, GNorm = 1.9659, lr_0 = 8.8147e-04
Loss = 1.9996e-01, PNorm = 46.5641, GNorm = 1.3446, lr_0 = 8.8087e-04
Loss = 1.6353e-01, PNorm = 46.5896, GNorm = 1.2761, lr_0 = 8.8026e-04
Loss = 1.5648e-01, PNorm = 46.6171, GNorm = 1.8106, lr_0 = 8.7966e-04
Loss = 1.8174e-01, PNorm = 46.6338, GNorm = 1.7459, lr_0 = 8.7906e-04
Loss = 1.8615e-01, PNorm = 46.6598, GNorm = 0.6609, lr_0 = 8.7846e-04
Loss = 1.8207e-01, PNorm = 46.6857, GNorm = 1.1499, lr_0 = 8.7785e-04
Loss = 1.8738e-01, PNorm = 46.7095, GNorm = 1.7580, lr_0 = 8.7725e-04
Loss = 1.5952e-01, PNorm = 46.7285, GNorm = 1.6518, lr_0 = 8.7665e-04
Loss = 1.6652e-01, PNorm = 46.7497, GNorm = 1.8741, lr_0 = 8.7605e-04
Loss = 1.5399e-01, PNorm = 46.7712, GNorm = 0.8839, lr_0 = 8.7545e-04
Loss = 1.7184e-01, PNorm = 46.7918, GNorm = 1.3402, lr_0 = 8.7485e-04
Loss = 1.6444e-01, PNorm = 46.8107, GNorm = 2.0655, lr_0 = 8.7425e-04
Loss = 1.4489e-01, PNorm = 46.8340, GNorm = 3.1752, lr_0 = 8.7365e-04
Loss = 1.8950e-01, PNorm = 46.8578, GNorm = 1.6921, lr_0 = 8.7306e-04
Loss = 1.6823e-01, PNorm = 46.8695, GNorm = 2.5416, lr_0 = 8.7246e-04
Loss = 1.9278e-01, PNorm = 46.8859, GNorm = 1.0819, lr_0 = 8.7186e-04
Loss = 1.9933e-01, PNorm = 46.9118, GNorm = 1.7920, lr_0 = 8.7126e-04
Loss = 1.7961e-01, PNorm = 46.9490, GNorm = 1.5653, lr_0 = 8.7067e-04
Loss = 1.7648e-01, PNorm = 46.9684, GNorm = 3.2423, lr_0 = 8.7007e-04
Loss = 1.5077e-01, PNorm = 46.9884, GNorm = 1.3091, lr_0 = 8.6947e-04
Loss = 1.5514e-01, PNorm = 47.0146, GNorm = 3.1332, lr_0 = 8.6888e-04
Loss = 1.6932e-01, PNorm = 47.0419, GNorm = 1.3219, lr_0 = 8.6828e-04
Loss = 1.6072e-01, PNorm = 47.0691, GNorm = 0.9398, lr_0 = 8.6769e-04
Loss = 1.4516e-01, PNorm = 47.0883, GNorm = 1.3064, lr_0 = 8.6709e-04
Loss = 1.6494e-01, PNorm = 47.1099, GNorm = 2.5998, lr_0 = 8.6650e-04
Loss = 1.8207e-01, PNorm = 47.1346, GNorm = 2.0199, lr_0 = 8.6590e-04
Loss = 1.6876e-01, PNorm = 47.1586, GNorm = 0.7108, lr_0 = 8.6531e-04
Loss = 1.6512e-01, PNorm = 47.1818, GNorm = 1.6400, lr_0 = 8.6472e-04
Loss = 1.8230e-01, PNorm = 47.2055, GNorm = 1.2896, lr_0 = 8.6413e-04
Loss = 1.5540e-01, PNorm = 47.2385, GNorm = 0.8820, lr_0 = 8.6353e-04
Loss = 1.5670e-01, PNorm = 47.2544, GNorm = 1.4453, lr_0 = 8.6294e-04
Loss = 1.5693e-01, PNorm = 47.2609, GNorm = 0.9236, lr_0 = 8.6235e-04
Loss = 1.8340e-01, PNorm = 47.2778, GNorm = 3.3624, lr_0 = 8.6176e-04
Loss = 1.5683e-01, PNorm = 47.2926, GNorm = 0.7755, lr_0 = 8.6117e-04
Loss = 1.6611e-01, PNorm = 47.3151, GNorm = 0.5893, lr_0 = 8.6058e-04
Loss = 1.6896e-01, PNorm = 47.3330, GNorm = 1.6959, lr_0 = 8.5999e-04
Loss = 1.5644e-01, PNorm = 47.3477, GNorm = 1.3859, lr_0 = 8.5940e-04
Loss = 1.5815e-01, PNorm = 47.3585, GNorm = 1.8756, lr_0 = 8.5881e-04
Loss = 1.5588e-01, PNorm = 47.3767, GNorm = 1.5546, lr_0 = 8.5823e-04
Loss = 1.8626e-01, PNorm = 47.3957, GNorm = 1.6709, lr_0 = 8.5764e-04
Loss = 1.4700e-01, PNorm = 47.4256, GNorm = 1.5477, lr_0 = 8.5705e-04
Loss = 1.9765e-01, PNorm = 47.4522, GNorm = 3.0363, lr_0 = 8.5646e-04
Loss = 1.8034e-01, PNorm = 47.4795, GNorm = 2.0557, lr_0 = 8.5588e-04
Loss = 1.6688e-01, PNorm = 47.4976, GNorm = 0.9352, lr_0 = 8.5529e-04
Loss = 1.6542e-01, PNorm = 47.5119, GNorm = 1.8877, lr_0 = 8.5470e-04
Loss = 1.6515e-01, PNorm = 47.5268, GNorm = 2.5819, lr_0 = 8.5412e-04
Loss = 1.8849e-01, PNorm = 47.5523, GNorm = 1.5181, lr_0 = 8.5353e-04
Loss = 1.8607e-01, PNorm = 47.5795, GNorm = 2.5688, lr_0 = 8.5295e-04
Loss = 1.8099e-01, PNorm = 47.6046, GNorm = 1.0262, lr_0 = 8.5236e-04
Loss = 1.4496e-01, PNorm = 47.6349, GNorm = 2.0011, lr_0 = 8.5178e-04
Loss = 1.5892e-01, PNorm = 47.6501, GNorm = 1.0243, lr_0 = 8.5120e-04
Loss = 1.9130e-01, PNorm = 47.6622, GNorm = 2.9521, lr_0 = 8.5061e-04
Loss = 1.8006e-01, PNorm = 47.6872, GNorm = 1.7394, lr_0 = 8.5003e-04
Loss = 1.8387e-01, PNorm = 47.7135, GNorm = 1.3568, lr_0 = 8.4945e-04
Loss = 1.5311e-01, PNorm = 47.7292, GNorm = 0.9979, lr_0 = 8.4887e-04
Loss = 1.5181e-01, PNorm = 47.7463, GNorm = 1.1405, lr_0 = 8.4828e-04
Validation mae = 0.451300
Epoch 4
Loss = 1.4594e-01, PNorm = 47.7626, GNorm = 0.6532, lr_0 = 8.4770e-04
Loss = 1.4966e-01, PNorm = 47.7860, GNorm = 0.7407, lr_0 = 8.4712e-04
Loss = 1.8089e-01, PNorm = 47.8078, GNorm = 0.5835, lr_0 = 8.4654e-04
Loss = 1.2766e-01, PNorm = 47.8271, GNorm = 0.6488, lr_0 = 8.4596e-04
Loss = 1.6860e-01, PNorm = 47.8484, GNorm = 0.8099, lr_0 = 8.4538e-04
Loss = 1.5803e-01, PNorm = 47.8698, GNorm = 1.1565, lr_0 = 8.4480e-04
Loss = 1.4439e-01, PNorm = 47.8885, GNorm = 1.2770, lr_0 = 8.4423e-04
Loss = 1.6994e-01, PNorm = 47.9169, GNorm = 1.2150, lr_0 = 8.4365e-04
Loss = 1.5355e-01, PNorm = 47.9346, GNorm = 1.5432, lr_0 = 8.4307e-04
Loss = 1.5578e-01, PNorm = 47.9560, GNorm = 1.2062, lr_0 = 8.4249e-04
Loss = 1.3635e-01, PNorm = 47.9785, GNorm = 1.2225, lr_0 = 8.4191e-04
Loss = 1.6787e-01, PNorm = 47.9948, GNorm = 1.3032, lr_0 = 8.4134e-04
Loss = 1.3006e-01, PNorm = 48.0131, GNorm = 0.8755, lr_0 = 8.4076e-04
Loss = 1.6195e-01, PNorm = 48.0328, GNorm = 2.2568, lr_0 = 8.4019e-04
Loss = 1.4686e-01, PNorm = 48.0512, GNorm = 2.6759, lr_0 = 8.3961e-04
Loss = 1.6523e-01, PNorm = 48.0673, GNorm = 1.3082, lr_0 = 8.3903e-04
Loss = 1.4460e-01, PNorm = 48.0903, GNorm = 0.9478, lr_0 = 8.3846e-04
Loss = 1.3542e-01, PNorm = 48.1091, GNorm = 2.6945, lr_0 = 8.3789e-04
Loss = 1.6860e-01, PNorm = 48.1242, GNorm = 1.0358, lr_0 = 8.3731e-04
Loss = 1.5136e-01, PNorm = 48.1529, GNorm = 2.7859, lr_0 = 8.3674e-04
Loss = 1.5405e-01, PNorm = 48.1802, GNorm = 1.1530, lr_0 = 8.3616e-04
Loss = 1.5154e-01, PNorm = 48.2018, GNorm = 1.0165, lr_0 = 8.3559e-04
Loss = 1.3627e-01, PNorm = 48.2201, GNorm = 2.6203, lr_0 = 8.3502e-04
Loss = 1.4543e-01, PNorm = 48.2346, GNorm = 1.5645, lr_0 = 8.3445e-04
Loss = 1.2716e-01, PNorm = 48.2493, GNorm = 0.7917, lr_0 = 8.3388e-04
Loss = 1.4730e-01, PNorm = 48.2573, GNorm = 1.9425, lr_0 = 8.3330e-04
Loss = 1.6242e-01, PNorm = 48.2787, GNorm = 0.8716, lr_0 = 8.3273e-04
Loss = 1.4375e-01, PNorm = 48.3049, GNorm = 0.7596, lr_0 = 8.3216e-04
Loss = 1.4185e-01, PNorm = 48.3243, GNorm = 1.4417, lr_0 = 8.3159e-04
Loss = 1.6523e-01, PNorm = 48.3444, GNorm = 2.1505, lr_0 = 8.3102e-04
Loss = 1.6727e-01, PNorm = 48.3675, GNorm = 2.9806, lr_0 = 8.3045e-04
Loss = 1.4386e-01, PNorm = 48.3937, GNorm = 1.1553, lr_0 = 8.2988e-04
Loss = 1.4529e-01, PNorm = 48.4200, GNorm = 1.0554, lr_0 = 8.2932e-04
Loss = 1.7536e-01, PNorm = 48.4494, GNorm = 1.2782, lr_0 = 8.2875e-04
Loss = 1.5574e-01, PNorm = 48.4739, GNorm = 1.2764, lr_0 = 8.2818e-04
Loss = 1.5721e-01, PNorm = 48.4923, GNorm = 1.4248, lr_0 = 8.2761e-04
Loss = 1.4692e-01, PNorm = 48.5194, GNorm = 0.8790, lr_0 = 8.2705e-04
Loss = 1.6245e-01, PNorm = 48.5440, GNorm = 2.4773, lr_0 = 8.2648e-04
Loss = 1.6362e-01, PNorm = 48.5448, GNorm = 1.4180, lr_0 = 8.2591e-04
Loss = 1.5243e-01, PNorm = 48.5568, GNorm = 1.1197, lr_0 = 8.2535e-04
Loss = 1.5555e-01, PNorm = 48.5714, GNorm = 0.9917, lr_0 = 8.2478e-04
Loss = 1.6528e-01, PNorm = 48.5844, GNorm = 0.7536, lr_0 = 8.2422e-04
Loss = 1.6801e-01, PNorm = 48.6062, GNorm = 0.7438, lr_0 = 8.2365e-04
Loss = 1.4979e-01, PNorm = 48.6246, GNorm = 0.7929, lr_0 = 8.2309e-04
Loss = 1.3861e-01, PNorm = 48.6415, GNorm = 1.2726, lr_0 = 8.2252e-04
Loss = 1.4702e-01, PNorm = 48.6606, GNorm = 2.0996, lr_0 = 8.2196e-04
Loss = 1.4775e-01, PNorm = 48.6841, GNorm = 1.8344, lr_0 = 8.2140e-04
Loss = 1.4380e-01, PNorm = 48.7023, GNorm = 2.0459, lr_0 = 8.2084e-04
Loss = 1.4122e-01, PNorm = 48.7214, GNorm = 1.0892, lr_0 = 8.2027e-04
Loss = 1.4773e-01, PNorm = 48.7455, GNorm = 1.7674, lr_0 = 8.1971e-04
Loss = 1.4985e-01, PNorm = 48.7740, GNorm = 2.0372, lr_0 = 8.1915e-04
Loss = 1.4151e-01, PNorm = 48.8016, GNorm = 0.7027, lr_0 = 8.1859e-04
Loss = 1.3622e-01, PNorm = 48.8224, GNorm = 2.4139, lr_0 = 8.1803e-04
Loss = 1.4954e-01, PNorm = 48.8397, GNorm = 0.8270, lr_0 = 8.1747e-04
Loss = 1.6135e-01, PNorm = 48.8510, GNorm = 1.1665, lr_0 = 8.1691e-04
Loss = 1.4283e-01, PNorm = 48.8794, GNorm = 1.0046, lr_0 = 8.1635e-04
Loss = 1.4389e-01, PNorm = 48.9023, GNorm = 0.8546, lr_0 = 8.1579e-04
Loss = 1.7256e-01, PNorm = 48.9214, GNorm = 0.7914, lr_0 = 8.1523e-04
Loss = 1.3969e-01, PNorm = 48.9375, GNorm = 0.8897, lr_0 = 8.1467e-04
Loss = 1.6482e-01, PNorm = 48.9612, GNorm = 2.3655, lr_0 = 8.1411e-04
Loss = 1.6833e-01, PNorm = 48.9868, GNorm = 2.1544, lr_0 = 8.1355e-04
Loss = 1.5483e-01, PNorm = 49.0017, GNorm = 0.6919, lr_0 = 8.1300e-04
Loss = 1.4153e-01, PNorm = 49.0073, GNorm = 1.6732, lr_0 = 8.1244e-04
Loss = 1.4792e-01, PNorm = 49.0237, GNorm = 1.0029, lr_0 = 8.1188e-04
Loss = 1.4675e-01, PNorm = 49.0472, GNorm = 0.5745, lr_0 = 8.1133e-04
Loss = 1.3838e-01, PNorm = 49.0700, GNorm = 1.5197, lr_0 = 8.1077e-04
Loss = 1.5862e-01, PNorm = 49.0875, GNorm = 0.8816, lr_0 = 8.1022e-04
Loss = 1.4292e-01, PNorm = 49.1070, GNorm = 1.6687, lr_0 = 8.0966e-04
Loss = 1.4137e-01, PNorm = 49.1192, GNorm = 0.9997, lr_0 = 8.0911e-04
Loss = 1.6488e-01, PNorm = 49.1330, GNorm = 2.0532, lr_0 = 8.0855e-04
Loss = 1.8180e-01, PNorm = 49.1556, GNorm = 1.8179, lr_0 = 8.0800e-04
Loss = 1.8249e-01, PNorm = 49.1857, GNorm = 1.3287, lr_0 = 8.0745e-04
Loss = 1.4296e-01, PNorm = 49.2144, GNorm = 0.7246, lr_0 = 8.0689e-04
Loss = 1.4763e-01, PNorm = 49.2357, GNorm = 0.6906, lr_0 = 8.0634e-04
Loss = 1.8082e-01, PNorm = 49.2543, GNorm = 2.5739, lr_0 = 8.0579e-04
Loss = 1.6863e-01, PNorm = 49.2779, GNorm = 2.3492, lr_0 = 8.0523e-04
Loss = 1.4805e-01, PNorm = 49.3061, GNorm = 1.4375, lr_0 = 8.0468e-04
Loss = 1.4053e-01, PNorm = 49.3257, GNorm = 0.9789, lr_0 = 8.0413e-04
Loss = 1.4636e-01, PNorm = 49.3432, GNorm = 0.8098, lr_0 = 8.0358e-04
Loss = 1.9175e-01, PNorm = 49.3607, GNorm = 1.3216, lr_0 = 8.0303e-04
Loss = 1.5485e-01, PNorm = 49.3728, GNorm = 0.8555, lr_0 = 8.0248e-04
Loss = 1.5036e-01, PNorm = 49.3856, GNorm = 0.9738, lr_0 = 8.0193e-04
Loss = 1.5748e-01, PNorm = 49.3996, GNorm = 1.4899, lr_0 = 8.0138e-04
Loss = 1.4056e-01, PNorm = 49.4216, GNorm = 1.5009, lr_0 = 8.0083e-04
Loss = 1.5794e-01, PNorm = 49.4350, GNorm = 1.6496, lr_0 = 8.0028e-04
Loss = 1.6318e-01, PNorm = 49.4492, GNorm = 0.7235, lr_0 = 7.9974e-04
Loss = 1.5781e-01, PNorm = 49.4684, GNorm = 0.6758, lr_0 = 7.9919e-04
Loss = 1.5318e-01, PNorm = 49.4865, GNorm = 1.0303, lr_0 = 7.9864e-04
Loss = 1.6569e-01, PNorm = 49.5074, GNorm = 1.0914, lr_0 = 7.9809e-04
Loss = 1.8091e-01, PNorm = 49.5271, GNorm = 1.3010, lr_0 = 7.9755e-04
Loss = 1.4120e-01, PNorm = 49.5595, GNorm = 0.9123, lr_0 = 7.9700e-04
Loss = 1.4960e-01, PNorm = 49.5906, GNorm = 2.0892, lr_0 = 7.9645e-04
Loss = 1.4379e-01, PNorm = 49.6116, GNorm = 1.1214, lr_0 = 7.9591e-04
Loss = 1.6402e-01, PNorm = 49.6209, GNorm = 2.6808, lr_0 = 7.9536e-04
Loss = 1.5611e-01, PNorm = 49.6374, GNorm = 0.7505, lr_0 = 7.9482e-04
Loss = 1.4072e-01, PNorm = 49.6587, GNorm = 1.1688, lr_0 = 7.9427e-04
Loss = 1.6673e-01, PNorm = 49.6850, GNorm = 1.5577, lr_0 = 7.9373e-04
Loss = 1.4975e-01, PNorm = 49.6982, GNorm = 0.7928, lr_0 = 7.9319e-04
Loss = 1.3120e-01, PNorm = 49.7114, GNorm = 0.9052, lr_0 = 7.9264e-04
Loss = 1.4080e-01, PNorm = 49.7272, GNorm = 1.1291, lr_0 = 7.9210e-04
Loss = 1.3800e-01, PNorm = 49.7403, GNorm = 1.9356, lr_0 = 7.9156e-04
Loss = 1.5319e-01, PNorm = 49.7540, GNorm = 0.6666, lr_0 = 7.9101e-04
Loss = 1.3215e-01, PNorm = 49.7788, GNorm = 0.9863, lr_0 = 7.9047e-04
Loss = 1.5549e-01, PNorm = 49.8078, GNorm = 0.5001, lr_0 = 7.8993e-04
Loss = 1.3847e-01, PNorm = 49.8241, GNorm = 0.6888, lr_0 = 7.8939e-04
Loss = 1.3425e-01, PNorm = 49.8335, GNorm = 1.2863, lr_0 = 7.8885e-04
Loss = 1.6078e-01, PNorm = 49.8524, GNorm = 2.5096, lr_0 = 7.8831e-04
Loss = 1.3883e-01, PNorm = 49.8661, GNorm = 1.6023, lr_0 = 7.8777e-04
Loss = 1.5482e-01, PNorm = 49.8917, GNorm = 1.3173, lr_0 = 7.8723e-04
Loss = 1.4806e-01, PNorm = 49.9186, GNorm = 1.9137, lr_0 = 7.8669e-04
Loss = 1.5584e-01, PNorm = 49.9499, GNorm = 0.8816, lr_0 = 7.8615e-04
Loss = 1.4953e-01, PNorm = 49.9643, GNorm = 0.6550, lr_0 = 7.8561e-04
Loss = 1.3658e-01, PNorm = 49.9704, GNorm = 1.8427, lr_0 = 7.8507e-04
Loss = 1.4276e-01, PNorm = 49.9806, GNorm = 0.8129, lr_0 = 7.8454e-04
Loss = 1.2490e-01, PNorm = 49.9948, GNorm = 0.6145, lr_0 = 7.8400e-04
Loss = 1.5039e-01, PNorm = 50.0109, GNorm = 0.6776, lr_0 = 7.8346e-04
Loss = 1.4561e-01, PNorm = 50.0283, GNorm = 1.0453, lr_0 = 7.8293e-04
Loss = 1.5860e-01, PNorm = 50.0485, GNorm = 1.8981, lr_0 = 7.8239e-04
Loss = 1.3903e-01, PNorm = 50.0723, GNorm = 1.5381, lr_0 = 7.8185e-04
Loss = 1.5502e-01, PNorm = 50.0900, GNorm = 0.9126, lr_0 = 7.8132e-04
Validation mae = 0.467801
Epoch 5
Loss = 1.5065e-01, PNorm = 50.1167, GNorm = 1.6499, lr_0 = 7.8078e-04
Loss = 1.2613e-01, PNorm = 50.1387, GNorm = 0.8463, lr_0 = 7.8025e-04
Loss = 1.3738e-01, PNorm = 50.1546, GNorm = 1.8363, lr_0 = 7.7971e-04
Loss = 1.3286e-01, PNorm = 50.1731, GNorm = 1.7415, lr_0 = 7.7918e-04
Loss = 1.2918e-01, PNorm = 50.1959, GNorm = 1.4712, lr_0 = 7.7864e-04
Loss = 1.2833e-01, PNorm = 50.2154, GNorm = 0.9204, lr_0 = 7.7811e-04
Loss = 1.4798e-01, PNorm = 50.2296, GNorm = 1.4820, lr_0 = 7.7758e-04
Loss = 1.4587e-01, PNorm = 50.2551, GNorm = 0.6858, lr_0 = 7.7705e-04
Loss = 1.3605e-01, PNorm = 50.2818, GNorm = 1.7042, lr_0 = 7.7651e-04
Loss = 1.4911e-01, PNorm = 50.3032, GNorm = 0.8269, lr_0 = 7.7598e-04
Loss = 1.5651e-01, PNorm = 50.3212, GNorm = 2.1619, lr_0 = 7.7545e-04
Loss = 1.5405e-01, PNorm = 50.3384, GNorm = 0.8713, lr_0 = 7.7492e-04
Loss = 1.4590e-01, PNorm = 50.3643, GNorm = 2.4423, lr_0 = 7.7439e-04
Loss = 1.5546e-01, PNorm = 50.3856, GNorm = 2.2749, lr_0 = 7.7386e-04
Loss = 1.3806e-01, PNorm = 50.4088, GNorm = 1.7269, lr_0 = 7.7333e-04
Loss = 1.3064e-01, PNorm = 50.4299, GNorm = 0.7677, lr_0 = 7.7280e-04
Loss = 1.3535e-01, PNorm = 50.4487, GNorm = 1.5091, lr_0 = 7.7227e-04
Loss = 1.4938e-01, PNorm = 50.4672, GNorm = 0.7461, lr_0 = 7.7174e-04
Loss = 1.2525e-01, PNorm = 50.4889, GNorm = 0.5435, lr_0 = 7.7121e-04
Loss = 1.3277e-01, PNorm = 50.5058, GNorm = 0.8256, lr_0 = 7.7068e-04
Loss = 1.3455e-01, PNorm = 50.5165, GNorm = 0.5474, lr_0 = 7.7015e-04
Loss = 1.2232e-01, PNorm = 50.5357, GNorm = 1.4796, lr_0 = 7.6963e-04
Loss = 1.1261e-01, PNorm = 50.5559, GNorm = 0.5053, lr_0 = 7.6910e-04
Loss = 1.4947e-01, PNorm = 50.5711, GNorm = 0.7021, lr_0 = 7.6857e-04
Loss = 1.2428e-01, PNorm = 50.5828, GNorm = 1.3333, lr_0 = 7.6805e-04
Loss = 1.3917e-01, PNorm = 50.6029, GNorm = 1.2357, lr_0 = 7.6752e-04
Loss = 1.3388e-01, PNorm = 50.6189, GNorm = 0.6760, lr_0 = 7.6699e-04
Loss = 1.2709e-01, PNorm = 50.6418, GNorm = 0.7249, lr_0 = 7.6647e-04
Loss = 1.2812e-01, PNorm = 50.6639, GNorm = 1.2588, lr_0 = 7.6594e-04
Loss = 1.5220e-01, PNorm = 50.6810, GNorm = 3.2572, lr_0 = 7.6542e-04
Loss = 1.4195e-01, PNorm = 50.6992, GNorm = 1.3586, lr_0 = 7.6489e-04
Loss = 1.4180e-01, PNorm = 50.7138, GNorm = 0.5957, lr_0 = 7.6437e-04
Loss = 1.3989e-01, PNorm = 50.7356, GNorm = 0.8442, lr_0 = 7.6385e-04
Loss = 1.1513e-01, PNorm = 50.7577, GNorm = 1.2079, lr_0 = 7.6332e-04
Loss = 1.3724e-01, PNorm = 50.7770, GNorm = 0.9682, lr_0 = 7.6280e-04
Loss = 1.2904e-01, PNorm = 50.7949, GNorm = 1.1713, lr_0 = 7.6228e-04
Loss = 1.5845e-01, PNorm = 50.8104, GNorm = 1.9196, lr_0 = 7.6176e-04
Loss = 1.4521e-01, PNorm = 50.8323, GNorm = 2.6821, lr_0 = 7.6123e-04
Loss = 1.6052e-01, PNorm = 50.8533, GNorm = 1.0769, lr_0 = 7.6071e-04
Loss = 1.5707e-01, PNorm = 50.8779, GNorm = 1.6441, lr_0 = 7.6019e-04
Loss = 1.4027e-01, PNorm = 50.9045, GNorm = 1.8232, lr_0 = 7.5967e-04
Loss = 1.3448e-01, PNorm = 50.9257, GNorm = 1.9834, lr_0 = 7.5915e-04
Loss = 1.5025e-01, PNorm = 50.9350, GNorm = 0.8444, lr_0 = 7.5863e-04
Loss = 1.3382e-01, PNorm = 50.9532, GNorm = 1.6669, lr_0 = 7.5811e-04
Loss = 1.5716e-01, PNorm = 50.9803, GNorm = 1.2850, lr_0 = 7.5759e-04
Loss = 1.5246e-01, PNorm = 51.0092, GNorm = 1.5776, lr_0 = 7.5707e-04
Loss = 1.4374e-01, PNorm = 51.0306, GNorm = 0.7503, lr_0 = 7.5655e-04
Loss = 1.3266e-01, PNorm = 51.0479, GNorm = 0.8209, lr_0 = 7.5603e-04
Loss = 1.3856e-01, PNorm = 51.0643, GNorm = 1.1169, lr_0 = 7.5552e-04
Loss = 1.4389e-01, PNorm = 51.0878, GNorm = 0.6211, lr_0 = 7.5500e-04
Loss = 1.1878e-01, PNorm = 51.1072, GNorm = 1.0049, lr_0 = 7.5448e-04
Loss = 1.4498e-01, PNorm = 51.1232, GNorm = 1.6258, lr_0 = 7.5397e-04
Loss = 1.3233e-01, PNorm = 51.1430, GNorm = 0.7685, lr_0 = 7.5345e-04
Loss = 1.3884e-01, PNorm = 51.1635, GNorm = 0.6828, lr_0 = 7.5293e-04
Loss = 1.4267e-01, PNorm = 51.1791, GNorm = 0.6578, lr_0 = 7.5242e-04
Loss = 1.3866e-01, PNorm = 51.1881, GNorm = 0.8229, lr_0 = 7.5190e-04
Loss = 1.2639e-01, PNorm = 51.2014, GNorm = 0.8773, lr_0 = 7.5139e-04
Loss = 1.3680e-01, PNorm = 51.2193, GNorm = 0.8886, lr_0 = 7.5087e-04
Loss = 1.3402e-01, PNorm = 51.2374, GNorm = 0.7997, lr_0 = 7.5036e-04
Loss = 1.2713e-01, PNorm = 51.2592, GNorm = 1.8156, lr_0 = 7.4984e-04
Loss = 1.4679e-01, PNorm = 51.2728, GNorm = 1.0942, lr_0 = 7.4933e-04
Loss = 1.2830e-01, PNorm = 51.2951, GNorm = 1.4868, lr_0 = 7.4882e-04
Loss = 1.2096e-01, PNorm = 51.3183, GNorm = 1.1825, lr_0 = 7.4830e-04
Loss = 1.5711e-01, PNorm = 51.3356, GNorm = 2.3463, lr_0 = 7.4779e-04
Loss = 1.4023e-01, PNorm = 51.3495, GNorm = 1.1609, lr_0 = 7.4728e-04
Loss = 1.2120e-01, PNorm = 51.3694, GNorm = 0.8380, lr_0 = 7.4677e-04
Loss = 1.2847e-01, PNorm = 51.3899, GNorm = 0.5553, lr_0 = 7.4625e-04
Loss = 1.4479e-01, PNorm = 51.4056, GNorm = 1.9947, lr_0 = 7.4574e-04
Loss = 1.4904e-01, PNorm = 51.4298, GNorm = 1.3684, lr_0 = 7.4523e-04
Loss = 1.3926e-01, PNorm = 51.4492, GNorm = 0.6480, lr_0 = 7.4472e-04
Loss = 1.3365e-01, PNorm = 51.4618, GNorm = 1.6224, lr_0 = 7.4421e-04
Loss = 1.4798e-01, PNorm = 51.4786, GNorm = 0.9947, lr_0 = 7.4370e-04
Loss = 1.2848e-01, PNorm = 51.4948, GNorm = 0.7225, lr_0 = 7.4319e-04
Loss = 1.3770e-01, PNorm = 51.5086, GNorm = 0.6633, lr_0 = 7.4268e-04
Loss = 1.5397e-01, PNorm = 51.5206, GNorm = 3.7260, lr_0 = 7.4217e-04
Loss = 1.5186e-01, PNorm = 51.5480, GNorm = 0.7471, lr_0 = 7.4167e-04
Loss = 1.3433e-01, PNorm = 51.5702, GNorm = 0.9727, lr_0 = 7.4116e-04
Loss = 1.5616e-01, PNorm = 51.5904, GNorm = 3.1911, lr_0 = 7.4065e-04
Loss = 1.4157e-01, PNorm = 51.6149, GNorm = 0.7611, lr_0 = 7.4014e-04
Loss = 1.5156e-01, PNorm = 51.6387, GNorm = 1.1752, lr_0 = 7.3964e-04
Loss = 1.3205e-01, PNorm = 51.6513, GNorm = 0.8417, lr_0 = 7.3913e-04
Loss = 1.5673e-01, PNorm = 51.6631, GNorm = 1.0444, lr_0 = 7.3862e-04
Loss = 1.5703e-01, PNorm = 51.6853, GNorm = 1.5836, lr_0 = 7.3812e-04
Loss = 1.4659e-01, PNorm = 51.7119, GNorm = 1.9792, lr_0 = 7.3761e-04
Loss = 1.3951e-01, PNorm = 51.7369, GNorm = 2.2171, lr_0 = 7.3711e-04
Loss = 1.6426e-01, PNorm = 51.7566, GNorm = 2.6314, lr_0 = 7.3660e-04
Loss = 1.4425e-01, PNorm = 51.7747, GNorm = 1.9973, lr_0 = 7.3610e-04
Loss = 1.4384e-01, PNorm = 51.7985, GNorm = 0.4200, lr_0 = 7.3559e-04
Loss = 1.3210e-01, PNorm = 51.8198, GNorm = 0.7993, lr_0 = 7.3509e-04
Loss = 1.2901e-01, PNorm = 51.8382, GNorm = 0.6601, lr_0 = 7.3458e-04
Loss = 1.3583e-01, PNorm = 51.8538, GNorm = 1.1517, lr_0 = 7.3408e-04
Loss = 1.2430e-01, PNorm = 51.8706, GNorm = 1.6050, lr_0 = 7.3358e-04
Loss = 1.2724e-01, PNorm = 51.8865, GNorm = 1.4476, lr_0 = 7.3308e-04
Loss = 1.2535e-01, PNorm = 51.9007, GNorm = 0.6635, lr_0 = 7.3257e-04
Loss = 1.4208e-01, PNorm = 51.9135, GNorm = 2.2782, lr_0 = 7.3207e-04
Loss = 1.5644e-01, PNorm = 51.9293, GNorm = 1.0636, lr_0 = 7.3157e-04
Loss = 1.4154e-01, PNorm = 51.9566, GNorm = 0.6230, lr_0 = 7.3107e-04
Loss = 1.4181e-01, PNorm = 51.9801, GNorm = 0.6457, lr_0 = 7.3057e-04
Loss = 1.3121e-01, PNorm = 51.9984, GNorm = 1.1669, lr_0 = 7.3007e-04
Loss = 1.3859e-01, PNorm = 52.0119, GNorm = 0.8756, lr_0 = 7.2957e-04
Loss = 1.2376e-01, PNorm = 52.0256, GNorm = 0.7868, lr_0 = 7.2907e-04
Loss = 1.5104e-01, PNorm = 52.0362, GNorm = 1.0954, lr_0 = 7.2857e-04
Loss = 1.2072e-01, PNorm = 52.0449, GNorm = 0.7363, lr_0 = 7.2807e-04
Loss = 1.2356e-01, PNorm = 52.0608, GNorm = 0.7364, lr_0 = 7.2757e-04
Loss = 1.4285e-01, PNorm = 52.0759, GNorm = 0.7621, lr_0 = 7.2707e-04
Loss = 1.3138e-01, PNorm = 52.0932, GNorm = 1.0454, lr_0 = 7.2657e-04
Loss = 1.3359e-01, PNorm = 52.1140, GNorm = 0.7465, lr_0 = 7.2608e-04
Loss = 1.1517e-01, PNorm = 52.1315, GNorm = 1.0995, lr_0 = 7.2558e-04
Loss = 1.4604e-01, PNorm = 52.1486, GNorm = 1.3963, lr_0 = 7.2508e-04
Loss = 1.6037e-01, PNorm = 52.1645, GNorm = 1.6211, lr_0 = 7.2458e-04
Loss = 1.3991e-01, PNorm = 52.1790, GNorm = 1.0101, lr_0 = 7.2409e-04
Loss = 1.3928e-01, PNorm = 52.1903, GNorm = 0.7746, lr_0 = 7.2359e-04
Loss = 1.2490e-01, PNorm = 52.2113, GNorm = 0.7770, lr_0 = 7.2310e-04
Loss = 1.4801e-01, PNorm = 52.2221, GNorm = 1.2159, lr_0 = 7.2260e-04
Loss = 1.2587e-01, PNorm = 52.2323, GNorm = 0.9231, lr_0 = 7.2211e-04
Loss = 1.4575e-01, PNorm = 52.2440, GNorm = 0.6682, lr_0 = 7.2161e-04
Loss = 1.3786e-01, PNorm = 52.2577, GNorm = 2.1803, lr_0 = 7.2112e-04
Loss = 1.3094e-01, PNorm = 52.2725, GNorm = 1.3074, lr_0 = 7.2062e-04
Loss = 1.3792e-01, PNorm = 52.2932, GNorm = 0.5746, lr_0 = 7.2013e-04
Loss = 1.5064e-01, PNorm = 52.3016, GNorm = 0.5236, lr_0 = 7.1964e-04
Validation mae = 0.425156
Epoch 6
Loss = 1.2633e-01, PNorm = 52.3126, GNorm = 1.7027, lr_0 = 7.1914e-04
Loss = 1.4114e-01, PNorm = 52.3329, GNorm = 1.4542, lr_0 = 7.1865e-04
Loss = 1.4174e-01, PNorm = 52.3588, GNorm = 0.6268, lr_0 = 7.1816e-04
Loss = 1.1050e-01, PNorm = 52.3792, GNorm = 0.5869, lr_0 = 7.1767e-04
Loss = 1.1826e-01, PNorm = 52.3946, GNorm = 0.8696, lr_0 = 7.1717e-04
Loss = 1.2138e-01, PNorm = 52.4134, GNorm = 0.6462, lr_0 = 7.1668e-04
Loss = 1.3287e-01, PNorm = 52.4311, GNorm = 0.6676, lr_0 = 7.1619e-04
Loss = 1.3670e-01, PNorm = 52.4479, GNorm = 0.9793, lr_0 = 7.1570e-04
Loss = 1.2555e-01, PNorm = 52.4620, GNorm = 1.0892, lr_0 = 7.1521e-04
Loss = 1.5131e-01, PNorm = 52.4801, GNorm = 2.1285, lr_0 = 7.1472e-04
Loss = 1.4038e-01, PNorm = 52.5095, GNorm = 1.3115, lr_0 = 7.1423e-04
Loss = 1.5616e-01, PNorm = 52.5401, GNorm = 1.2243, lr_0 = 7.1374e-04
Loss = 1.2051e-01, PNorm = 52.5679, GNorm = 1.1858, lr_0 = 7.1325e-04
Loss = 1.1745e-01, PNorm = 52.5818, GNorm = 0.6671, lr_0 = 7.1277e-04
Loss = 1.2441e-01, PNorm = 52.6040, GNorm = 1.2203, lr_0 = 7.1228e-04
Loss = 1.2591e-01, PNorm = 52.6273, GNorm = 0.5642, lr_0 = 7.1179e-04
Loss = 1.1550e-01, PNorm = 52.6489, GNorm = 0.6019, lr_0 = 7.1130e-04
Loss = 1.1321e-01, PNorm = 52.6635, GNorm = 0.7319, lr_0 = 7.1081e-04
Loss = 1.4207e-01, PNorm = 52.6744, GNorm = 1.0353, lr_0 = 7.1033e-04
Loss = 1.1619e-01, PNorm = 52.6920, GNorm = 1.5100, lr_0 = 7.0984e-04
Loss = 1.2137e-01, PNorm = 52.7064, GNorm = 1.1480, lr_0 = 7.0935e-04
Loss = 1.3430e-01, PNorm = 52.7288, GNorm = 0.6138, lr_0 = 7.0887e-04
Loss = 1.2118e-01, PNorm = 52.7464, GNorm = 0.6094, lr_0 = 7.0838e-04
Loss = 1.1586e-01, PNorm = 52.7599, GNorm = 0.7438, lr_0 = 7.0790e-04
Loss = 1.2812e-01, PNorm = 52.7735, GNorm = 1.4744, lr_0 = 7.0741e-04
Loss = 1.5403e-01, PNorm = 52.7791, GNorm = 2.0879, lr_0 = 7.0693e-04
Loss = 1.3760e-01, PNorm = 52.8029, GNorm = 1.7782, lr_0 = 7.0644e-04
Loss = 1.4141e-01, PNorm = 52.8191, GNorm = 0.6513, lr_0 = 7.0596e-04
Loss = 1.3904e-01, PNorm = 52.8344, GNorm = 0.8247, lr_0 = 7.0548e-04
Loss = 1.2085e-01, PNorm = 52.8541, GNorm = 1.3015, lr_0 = 7.0499e-04
Loss = 1.4566e-01, PNorm = 52.8714, GNorm = 1.3427, lr_0 = 7.0451e-04
Loss = 1.3995e-01, PNorm = 52.8912, GNorm = 0.7703, lr_0 = 7.0403e-04
Loss = 1.2655e-01, PNorm = 52.9090, GNorm = 3.1990, lr_0 = 7.0354e-04
Loss = 1.5346e-01, PNorm = 52.9301, GNorm = 1.0870, lr_0 = 7.0306e-04
Loss = 1.2502e-01, PNorm = 52.9422, GNorm = 0.7350, lr_0 = 7.0258e-04
Loss = 1.3642e-01, PNorm = 52.9631, GNorm = 1.0803, lr_0 = 7.0210e-04
Loss = 1.1557e-01, PNorm = 52.9826, GNorm = 0.7865, lr_0 = 7.0162e-04
Loss = 1.3003e-01, PNorm = 52.9978, GNorm = 1.1700, lr_0 = 7.0114e-04
Loss = 1.2556e-01, PNorm = 53.0122, GNorm = 0.9452, lr_0 = 7.0066e-04
Loss = 1.2245e-01, PNorm = 53.0278, GNorm = 0.6556, lr_0 = 7.0018e-04
Loss = 1.2881e-01, PNorm = 53.0455, GNorm = 0.8337, lr_0 = 6.9970e-04
Loss = 1.2042e-01, PNorm = 53.0620, GNorm = 1.1150, lr_0 = 6.9922e-04
Loss = 1.1059e-01, PNorm = 53.0790, GNorm = 1.0410, lr_0 = 6.9874e-04
Loss = 1.2059e-01, PNorm = 53.0983, GNorm = 0.9210, lr_0 = 6.9826e-04
Loss = 1.2720e-01, PNorm = 53.1192, GNorm = 1.3362, lr_0 = 6.9778e-04
Loss = 1.3460e-01, PNorm = 53.1360, GNorm = 0.7550, lr_0 = 6.9730e-04
Loss = 1.2178e-01, PNorm = 53.1590, GNorm = 0.8221, lr_0 = 6.9683e-04
Loss = 1.1728e-01, PNorm = 53.1775, GNorm = 0.7544, lr_0 = 6.9635e-04
Loss = 1.2106e-01, PNorm = 53.1929, GNorm = 0.6258, lr_0 = 6.9587e-04
Loss = 1.2669e-01, PNorm = 53.2074, GNorm = 1.5754, lr_0 = 6.9540e-04
Loss = 1.3820e-01, PNorm = 53.2215, GNorm = 1.3911, lr_0 = 6.9492e-04
Loss = 1.2250e-01, PNorm = 53.2363, GNorm = 1.2054, lr_0 = 6.9444e-04
Loss = 1.3847e-01, PNorm = 53.2556, GNorm = 1.1688, lr_0 = 6.9397e-04
Loss = 1.4527e-01, PNorm = 53.2783, GNorm = 0.6899, lr_0 = 6.9349e-04
Loss = 1.4390e-01, PNorm = 53.2948, GNorm = 2.8518, lr_0 = 6.9302e-04
Loss = 1.3703e-01, PNorm = 53.3164, GNorm = 0.8064, lr_0 = 6.9254e-04
Loss = 1.4138e-01, PNorm = 53.3415, GNorm = 0.9657, lr_0 = 6.9207e-04
Loss = 1.3185e-01, PNorm = 53.3580, GNorm = 1.1058, lr_0 = 6.9159e-04
Loss = 1.4745e-01, PNorm = 53.3760, GNorm = 1.4526, lr_0 = 6.9112e-04
Loss = 1.4657e-01, PNorm = 53.3983, GNorm = 1.4220, lr_0 = 6.9065e-04
Loss = 1.3946e-01, PNorm = 53.4268, GNorm = 1.4762, lr_0 = 6.9017e-04
Loss = 1.1920e-01, PNorm = 53.4578, GNorm = 1.0221, lr_0 = 6.8970e-04
Loss = 1.2209e-01, PNorm = 53.4897, GNorm = 0.9781, lr_0 = 6.8923e-04
Loss = 1.3997e-01, PNorm = 53.5086, GNorm = 0.8213, lr_0 = 6.8876e-04
Loss = 1.2629e-01, PNorm = 53.5245, GNorm = 0.5035, lr_0 = 6.8828e-04
Loss = 1.3213e-01, PNorm = 53.5460, GNorm = 0.6552, lr_0 = 6.8781e-04
Loss = 1.2355e-01, PNorm = 53.5622, GNorm = 1.8036, lr_0 = 6.8734e-04
Loss = 1.1319e-01, PNorm = 53.5752, GNorm = 1.0945, lr_0 = 6.8687e-04
Loss = 1.2510e-01, PNorm = 53.5908, GNorm = 0.7385, lr_0 = 6.8640e-04
Loss = 1.2483e-01, PNorm = 53.6023, GNorm = 0.7713, lr_0 = 6.8593e-04
Loss = 1.3832e-01, PNorm = 53.6135, GNorm = 1.9522, lr_0 = 6.8546e-04
Loss = 1.4726e-01, PNorm = 53.6310, GNorm = 1.7578, lr_0 = 6.8499e-04
Loss = 1.3141e-01, PNorm = 53.6537, GNorm = 0.7727, lr_0 = 6.8452e-04
Loss = 1.1473e-01, PNorm = 53.6762, GNorm = 0.6091, lr_0 = 6.8405e-04
Loss = 1.3546e-01, PNorm = 53.7013, GNorm = 0.6502, lr_0 = 6.8358e-04
Loss = 1.1866e-01, PNorm = 53.7169, GNorm = 0.8157, lr_0 = 6.8312e-04
Loss = 1.4762e-01, PNorm = 53.7257, GNorm = 0.6718, lr_0 = 6.8265e-04
Loss = 1.1497e-01, PNorm = 53.7334, GNorm = 0.7368, lr_0 = 6.8218e-04
Loss = 1.3746e-01, PNorm = 53.7483, GNorm = 0.5304, lr_0 = 6.8171e-04
Loss = 1.4348e-01, PNorm = 53.7685, GNorm = 0.7109, lr_0 = 6.8125e-04
Loss = 1.2299e-01, PNorm = 53.7915, GNorm = 0.5965, lr_0 = 6.8078e-04
Loss = 1.2543e-01, PNorm = 53.8090, GNorm = 0.7793, lr_0 = 6.8031e-04
Loss = 9.8563e-02, PNorm = 53.8263, GNorm = 0.8713, lr_0 = 6.7985e-04
Loss = 1.0555e-01, PNorm = 53.8389, GNorm = 0.6574, lr_0 = 6.7938e-04
Loss = 1.1187e-01, PNorm = 53.8528, GNorm = 0.9115, lr_0 = 6.7892e-04
Loss = 1.3718e-01, PNorm = 53.8678, GNorm = 0.7015, lr_0 = 6.7845e-04
Loss = 1.2783e-01, PNorm = 53.8804, GNorm = 1.0712, lr_0 = 6.7799e-04
Loss = 1.3253e-01, PNorm = 53.8934, GNorm = 0.9404, lr_0 = 6.7752e-04
Loss = 1.2372e-01, PNorm = 53.9082, GNorm = 0.8302, lr_0 = 6.7706e-04
Loss = 1.5004e-01, PNorm = 53.9272, GNorm = 0.6638, lr_0 = 6.7659e-04
Loss = 1.1300e-01, PNorm = 53.9438, GNorm = 0.7426, lr_0 = 6.7613e-04
Loss = 1.0379e-01, PNorm = 53.9574, GNorm = 0.5617, lr_0 = 6.7567e-04
Loss = 1.1154e-01, PNorm = 53.9678, GNorm = 0.7020, lr_0 = 6.7520e-04
Loss = 1.2208e-01, PNorm = 53.9796, GNorm = 0.7619, lr_0 = 6.7474e-04
Loss = 1.2413e-01, PNorm = 53.9967, GNorm = 0.6667, lr_0 = 6.7428e-04
Loss = 1.2329e-01, PNorm = 54.0141, GNorm = 0.9651, lr_0 = 6.7382e-04
Loss = 1.4163e-01, PNorm = 54.0348, GNorm = 1.0068, lr_0 = 6.7335e-04
Loss = 1.2326e-01, PNorm = 54.0562, GNorm = 0.6078, lr_0 = 6.7289e-04
Loss = 1.2926e-01, PNorm = 54.0719, GNorm = 1.4142, lr_0 = 6.7243e-04
Loss = 1.3877e-01, PNorm = 54.0914, GNorm = 0.7658, lr_0 = 6.7197e-04
Loss = 1.4467e-01, PNorm = 54.1143, GNorm = 1.3404, lr_0 = 6.7151e-04
Loss = 1.1849e-01, PNorm = 54.1331, GNorm = 0.6570, lr_0 = 6.7105e-04
Loss = 1.2580e-01, PNorm = 54.1470, GNorm = 0.7856, lr_0 = 6.7059e-04
Loss = 1.1565e-01, PNorm = 54.1603, GNorm = 1.4659, lr_0 = 6.7013e-04
Loss = 1.3073e-01, PNorm = 54.1734, GNorm = 0.5406, lr_0 = 6.6967e-04
Loss = 1.1433e-01, PNorm = 54.1858, GNorm = 0.6497, lr_0 = 6.6921e-04
Loss = 1.1972e-01, PNorm = 54.1978, GNorm = 1.0792, lr_0 = 6.6876e-04
Loss = 1.3335e-01, PNorm = 54.2097, GNorm = 0.8130, lr_0 = 6.6830e-04
Loss = 1.1928e-01, PNorm = 54.2275, GNorm = 1.0269, lr_0 = 6.6784e-04
Loss = 1.4243e-01, PNorm = 54.2405, GNorm = 0.3412, lr_0 = 6.6738e-04
Loss = 1.4338e-01, PNorm = 54.2579, GNorm = 0.7597, lr_0 = 6.6693e-04
Loss = 1.3167e-01, PNorm = 54.2760, GNorm = 0.6745, lr_0 = 6.6647e-04
Loss = 1.3056e-01, PNorm = 54.2896, GNorm = 0.6249, lr_0 = 6.6601e-04
Loss = 1.5046e-01, PNorm = 54.3009, GNorm = 2.6128, lr_0 = 6.6556e-04
Loss = 1.4929e-01, PNorm = 54.3258, GNorm = 2.1507, lr_0 = 6.6510e-04
Loss = 1.6589e-01, PNorm = 54.3538, GNorm = 1.0093, lr_0 = 6.6464e-04
Loss = 1.5829e-01, PNorm = 54.3751, GNorm = 0.8981, lr_0 = 6.6419e-04
Loss = 1.2845e-01, PNorm = 54.3998, GNorm = 0.7733, lr_0 = 6.6373e-04
Loss = 1.4795e-01, PNorm = 54.4222, GNorm = 1.3038, lr_0 = 6.6328e-04
Loss = 1.4599e-01, PNorm = 54.4380, GNorm = 1.3173, lr_0 = 6.6282e-04
Validation mae = 0.463242
Epoch 7
Loss = 1.4008e-01, PNorm = 54.4619, GNorm = 1.6907, lr_0 = 6.6237e-04
Loss = 1.2587e-01, PNorm = 54.4846, GNorm = 1.7976, lr_0 = 6.6192e-04
Loss = 1.4290e-01, PNorm = 54.5037, GNorm = 1.8766, lr_0 = 6.6146e-04
Loss = 1.1748e-01, PNorm = 54.5253, GNorm = 0.7008, lr_0 = 6.6101e-04
Loss = 1.2189e-01, PNorm = 54.5442, GNorm = 0.5280, lr_0 = 6.6056e-04
Loss = 1.2753e-01, PNorm = 54.5605, GNorm = 1.2822, lr_0 = 6.6011e-04
Loss = 1.3719e-01, PNorm = 54.5772, GNorm = 0.7623, lr_0 = 6.5965e-04
Loss = 1.2951e-01, PNorm = 54.6003, GNorm = 0.6807, lr_0 = 6.5920e-04
Loss = 1.2351e-01, PNorm = 54.6208, GNorm = 1.0056, lr_0 = 6.5875e-04
Loss = 1.1816e-01, PNorm = 54.6425, GNorm = 0.7230, lr_0 = 6.5830e-04
Loss = 1.4692e-01, PNorm = 54.6642, GNorm = 0.8976, lr_0 = 6.5785e-04
Loss = 1.3086e-01, PNorm = 54.6802, GNorm = 0.7664, lr_0 = 6.5740e-04
Loss = 1.1323e-01, PNorm = 54.6985, GNorm = 1.1044, lr_0 = 6.5695e-04
Loss = 1.1685e-01, PNorm = 54.7196, GNorm = 0.6394, lr_0 = 6.5650e-04
Loss = 1.1963e-01, PNorm = 54.7391, GNorm = 1.0896, lr_0 = 6.5605e-04
Loss = 1.1239e-01, PNorm = 54.7543, GNorm = 1.3181, lr_0 = 6.5560e-04
Loss = 1.0129e-01, PNorm = 54.7703, GNorm = 1.5781, lr_0 = 6.5515e-04
Loss = 1.2231e-01, PNorm = 54.7830, GNorm = 1.4155, lr_0 = 6.5470e-04
Loss = 1.2186e-01, PNorm = 54.7918, GNorm = 0.8922, lr_0 = 6.5425e-04
Loss = 1.2820e-01, PNorm = 54.8120, GNorm = 0.7870, lr_0 = 6.5380e-04
Loss = 1.1077e-01, PNorm = 54.8328, GNorm = 1.3314, lr_0 = 6.5335e-04
Loss = 1.1866e-01, PNorm = 54.8495, GNorm = 1.4051, lr_0 = 6.5291e-04
Loss = 1.2994e-01, PNorm = 54.8695, GNorm = 1.0461, lr_0 = 6.5246e-04
Loss = 1.2044e-01, PNorm = 54.8889, GNorm = 1.0165, lr_0 = 6.5201e-04
Loss = 1.0231e-01, PNorm = 54.9035, GNorm = 0.4887, lr_0 = 6.5157e-04
Loss = 1.1750e-01, PNorm = 54.9226, GNorm = 0.8954, lr_0 = 6.5112e-04
Loss = 1.2112e-01, PNorm = 54.9391, GNorm = 1.3585, lr_0 = 6.5067e-04
Loss = 1.1383e-01, PNorm = 54.9559, GNorm = 0.7003, lr_0 = 6.5023e-04
Loss = 1.3313e-01, PNorm = 54.9777, GNorm = 1.1421, lr_0 = 6.4978e-04
Loss = 1.0903e-01, PNorm = 54.9957, GNorm = 0.7523, lr_0 = 6.4934e-04
Loss = 1.1816e-01, PNorm = 55.0082, GNorm = 1.9946, lr_0 = 6.4889e-04
Loss = 1.0748e-01, PNorm = 55.0202, GNorm = 0.5700, lr_0 = 6.4845e-04
Loss = 1.2987e-01, PNorm = 55.0321, GNorm = 1.2242, lr_0 = 6.4800e-04
Loss = 1.0986e-01, PNorm = 55.0455, GNorm = 1.0594, lr_0 = 6.4756e-04
Loss = 1.2763e-01, PNorm = 55.0573, GNorm = 1.1737, lr_0 = 6.4712e-04
Loss = 1.2690e-01, PNorm = 55.0734, GNorm = 0.6688, lr_0 = 6.4667e-04
Loss = 9.7773e-02, PNorm = 55.0902, GNorm = 0.6493, lr_0 = 6.4623e-04
Loss = 1.0346e-01, PNorm = 55.1037, GNorm = 0.6069, lr_0 = 6.4579e-04
Loss = 1.2154e-01, PNorm = 55.1193, GNorm = 1.3877, lr_0 = 6.4534e-04
Loss = 9.6456e-02, PNorm = 55.1355, GNorm = 1.0197, lr_0 = 6.4490e-04
Loss = 1.1097e-01, PNorm = 55.1462, GNorm = 1.0681, lr_0 = 6.4446e-04
Loss = 1.2712e-01, PNorm = 55.1615, GNorm = 2.0245, lr_0 = 6.4402e-04
Loss = 1.2764e-01, PNorm = 55.1759, GNorm = 1.2247, lr_0 = 6.4358e-04
Loss = 1.3798e-01, PNorm = 55.2015, GNorm = 1.3104, lr_0 = 6.4314e-04
Loss = 1.3155e-01, PNorm = 55.2249, GNorm = 0.6219, lr_0 = 6.4270e-04
Loss = 1.2250e-01, PNorm = 55.2416, GNorm = 0.7347, lr_0 = 6.4226e-04
Loss = 1.1292e-01, PNorm = 55.2575, GNorm = 0.9416, lr_0 = 6.4182e-04
Loss = 1.1802e-01, PNorm = 55.2739, GNorm = 0.7066, lr_0 = 6.4138e-04
Loss = 1.2276e-01, PNorm = 55.2910, GNorm = 0.9471, lr_0 = 6.4094e-04
Loss = 1.1730e-01, PNorm = 55.2993, GNorm = 0.8360, lr_0 = 6.4050e-04
Loss = 1.1829e-01, PNorm = 55.3106, GNorm = 0.8213, lr_0 = 6.4006e-04
Loss = 1.1086e-01, PNorm = 55.3294, GNorm = 1.1639, lr_0 = 6.3962e-04
Loss = 1.0695e-01, PNorm = 55.3482, GNorm = 0.9230, lr_0 = 6.3918e-04
Loss = 1.3455e-01, PNorm = 55.3626, GNorm = 1.3455, lr_0 = 6.3874e-04
Loss = 1.1410e-01, PNorm = 55.3756, GNorm = 0.9421, lr_0 = 6.3831e-04
Loss = 1.1437e-01, PNorm = 55.3954, GNorm = 0.6595, lr_0 = 6.3787e-04
Loss = 1.1023e-01, PNorm = 55.4128, GNorm = 0.7118, lr_0 = 6.3743e-04
Loss = 1.2004e-01, PNorm = 55.4239, GNorm = 1.7245, lr_0 = 6.3700e-04
Loss = 1.1971e-01, PNorm = 55.4397, GNorm = 0.6617, lr_0 = 6.3656e-04
Loss = 1.4854e-01, PNorm = 55.4559, GNorm = 1.1927, lr_0 = 6.3612e-04
Loss = 1.1589e-01, PNorm = 55.4715, GNorm = 0.5499, lr_0 = 6.3569e-04
Loss = 1.2962e-01, PNorm = 55.4865, GNorm = 0.9167, lr_0 = 6.3525e-04
Loss = 1.0146e-01, PNorm = 55.5037, GNorm = 0.8167, lr_0 = 6.3482e-04
Loss = 9.8378e-02, PNorm = 55.5100, GNorm = 0.9385, lr_0 = 6.3438e-04
Loss = 1.0123e-01, PNorm = 55.5185, GNorm = 0.5280, lr_0 = 6.3395e-04
Loss = 1.1302e-01, PNorm = 55.5337, GNorm = 0.9074, lr_0 = 6.3351e-04
Loss = 1.2906e-01, PNorm = 55.5500, GNorm = 1.9017, lr_0 = 6.3308e-04
Loss = 1.2301e-01, PNorm = 55.5637, GNorm = 0.5260, lr_0 = 6.3265e-04
Loss = 1.2100e-01, PNorm = 55.5746, GNorm = 0.8775, lr_0 = 6.3221e-04
Loss = 1.1466e-01, PNorm = 55.5879, GNorm = 0.5958, lr_0 = 6.3178e-04
Loss = 1.2158e-01, PNorm = 55.6020, GNorm = 0.7199, lr_0 = 6.3135e-04
Loss = 1.1780e-01, PNorm = 55.6184, GNorm = 1.5740, lr_0 = 6.3091e-04
Loss = 1.2235e-01, PNorm = 55.6361, GNorm = 1.7347, lr_0 = 6.3048e-04
Loss = 1.2543e-01, PNorm = 55.6597, GNorm = 0.7563, lr_0 = 6.3005e-04
Loss = 1.1015e-01, PNorm = 55.6839, GNorm = 0.8873, lr_0 = 6.2962e-04
Loss = 1.1359e-01, PNorm = 55.6958, GNorm = 0.6426, lr_0 = 6.2919e-04
Loss = 1.1189e-01, PNorm = 55.7080, GNorm = 0.8345, lr_0 = 6.2876e-04
Loss = 1.0620e-01, PNorm = 55.7232, GNorm = 0.5579, lr_0 = 6.2833e-04
Loss = 1.2187e-01, PNorm = 55.7379, GNorm = 1.3135, lr_0 = 6.2789e-04
Loss = 1.0293e-01, PNorm = 55.7484, GNorm = 1.1442, lr_0 = 6.2746e-04
Loss = 1.1800e-01, PNorm = 55.7666, GNorm = 1.4145, lr_0 = 6.2703e-04
Loss = 1.1967e-01, PNorm = 55.7860, GNorm = 1.1843, lr_0 = 6.2661e-04
Loss = 1.1887e-01, PNorm = 55.8020, GNorm = 0.9266, lr_0 = 6.2618e-04
Loss = 1.3539e-01, PNorm = 55.8230, GNorm = 1.3866, lr_0 = 6.2575e-04
Loss = 1.1773e-01, PNorm = 55.8384, GNorm = 0.8842, lr_0 = 6.2532e-04
Loss = 1.1353e-01, PNorm = 55.8536, GNorm = 0.8715, lr_0 = 6.2489e-04
Loss = 1.2706e-01, PNorm = 55.8710, GNorm = 0.7807, lr_0 = 6.2446e-04
Loss = 1.1822e-01, PNorm = 55.8820, GNorm = 0.8323, lr_0 = 6.2403e-04
Loss = 1.1210e-01, PNorm = 55.8950, GNorm = 0.7551, lr_0 = 6.2361e-04
Loss = 1.1061e-01, PNorm = 55.9117, GNorm = 1.1237, lr_0 = 6.2318e-04
Loss = 1.2713e-01, PNorm = 55.9278, GNorm = 0.9359, lr_0 = 6.2275e-04
Loss = 1.1984e-01, PNorm = 55.9376, GNorm = 1.0004, lr_0 = 6.2233e-04
Loss = 1.0495e-01, PNorm = 55.9482, GNorm = 0.5256, lr_0 = 6.2190e-04
Loss = 1.1708e-01, PNorm = 55.9561, GNorm = 1.3122, lr_0 = 6.2147e-04
Loss = 1.0730e-01, PNorm = 55.9677, GNorm = 0.5463, lr_0 = 6.2105e-04
Loss = 1.1697e-01, PNorm = 55.9878, GNorm = 0.9137, lr_0 = 6.2062e-04
Loss = 1.2886e-01, PNorm = 56.0072, GNorm = 1.6998, lr_0 = 6.2020e-04
Loss = 1.1708e-01, PNorm = 56.0218, GNorm = 0.8878, lr_0 = 6.1977e-04
Loss = 1.2301e-01, PNorm = 56.0409, GNorm = 0.7871, lr_0 = 6.1935e-04
Loss = 1.0586e-01, PNorm = 56.0569, GNorm = 0.6253, lr_0 = 6.1892e-04
Loss = 1.1597e-01, PNorm = 56.0713, GNorm = 1.1785, lr_0 = 6.1850e-04
Loss = 1.4250e-01, PNorm = 56.0913, GNorm = 1.1020, lr_0 = 6.1808e-04
Loss = 1.2894e-01, PNorm = 56.1098, GNorm = 0.9773, lr_0 = 6.1765e-04
Loss = 1.1680e-01, PNorm = 56.1217, GNorm = 0.9668, lr_0 = 6.1723e-04
Loss = 1.0476e-01, PNorm = 56.1324, GNorm = 0.5386, lr_0 = 6.1681e-04
Loss = 1.2396e-01, PNorm = 56.1518, GNorm = 1.0791, lr_0 = 6.1638e-04
Loss = 1.1526e-01, PNorm = 56.1703, GNorm = 1.0846, lr_0 = 6.1596e-04
Loss = 1.0432e-01, PNorm = 56.1848, GNorm = 0.6229, lr_0 = 6.1554e-04
Loss = 1.2899e-01, PNorm = 56.1931, GNorm = 0.6795, lr_0 = 6.1512e-04
Loss = 1.1408e-01, PNorm = 56.2084, GNorm = 0.8469, lr_0 = 6.1470e-04
Loss = 1.3895e-01, PNorm = 56.2242, GNorm = 1.2889, lr_0 = 6.1428e-04
Loss = 1.2264e-01, PNorm = 56.2394, GNorm = 1.4421, lr_0 = 6.1385e-04
Loss = 1.1303e-01, PNorm = 56.2540, GNorm = 0.9200, lr_0 = 6.1343e-04
Loss = 1.0748e-01, PNorm = 56.2747, GNorm = 0.7105, lr_0 = 6.1301e-04
Loss = 1.0884e-01, PNorm = 56.2900, GNorm = 0.6048, lr_0 = 6.1259e-04
Loss = 1.1557e-01, PNorm = 56.2989, GNorm = 0.6060, lr_0 = 6.1217e-04
Loss = 1.0932e-01, PNorm = 56.3101, GNorm = 1.1109, lr_0 = 6.1175e-04
Loss = 1.2870e-01, PNorm = 56.3239, GNorm = 1.6143, lr_0 = 6.1134e-04
Loss = 1.2399e-01, PNorm = 56.3398, GNorm = 1.2656, lr_0 = 6.1092e-04
Loss = 1.4333e-01, PNorm = 56.3530, GNorm = 0.7889, lr_0 = 6.1050e-04
Validation mae = 0.423821
Epoch 8
Loss = 1.0883e-01, PNorm = 56.3711, GNorm = 0.8817, lr_0 = 6.1008e-04
Loss = 1.1730e-01, PNorm = 56.3868, GNorm = 1.7822, lr_0 = 6.0966e-04
Loss = 9.5874e-02, PNorm = 56.4056, GNorm = 0.7806, lr_0 = 6.0924e-04
Loss = 1.0316e-01, PNorm = 56.4180, GNorm = 0.8548, lr_0 = 6.0883e-04
Loss = 1.0396e-01, PNorm = 56.4350, GNorm = 0.7392, lr_0 = 6.0841e-04
Loss = 1.0711e-01, PNorm = 56.4516, GNorm = 0.7130, lr_0 = 6.0799e-04
Loss = 1.0737e-01, PNorm = 56.4674, GNorm = 0.6038, lr_0 = 6.0758e-04
Loss = 1.1288e-01, PNorm = 56.4801, GNorm = 1.0292, lr_0 = 6.0716e-04
Loss = 1.0956e-01, PNorm = 56.4902, GNorm = 1.4896, lr_0 = 6.0674e-04
Loss = 1.0761e-01, PNorm = 56.5039, GNorm = 0.7538, lr_0 = 6.0633e-04
Loss = 1.1329e-01, PNorm = 56.5229, GNorm = 1.4074, lr_0 = 6.0591e-04
Loss = 1.1123e-01, PNorm = 56.5386, GNorm = 0.9195, lr_0 = 6.0550e-04
Loss = 1.1431e-01, PNorm = 56.5551, GNorm = 1.4704, lr_0 = 6.0508e-04
Loss = 1.0392e-01, PNorm = 56.5678, GNorm = 1.2846, lr_0 = 6.0467e-04
Loss = 1.1269e-01, PNorm = 56.5835, GNorm = 0.6344, lr_0 = 6.0425e-04
Loss = 1.0541e-01, PNorm = 56.6033, GNorm = 0.6936, lr_0 = 6.0384e-04
Loss = 9.5749e-02, PNorm = 56.6194, GNorm = 0.5684, lr_0 = 6.0343e-04
Loss = 1.0214e-01, PNorm = 56.6310, GNorm = 1.1806, lr_0 = 6.0301e-04
Loss = 1.1120e-01, PNorm = 56.6403, GNorm = 0.7497, lr_0 = 6.0260e-04
Loss = 1.1133e-01, PNorm = 56.6554, GNorm = 0.8423, lr_0 = 6.0219e-04
Loss = 9.6906e-02, PNorm = 56.6685, GNorm = 0.6852, lr_0 = 6.0178e-04
Loss = 1.0233e-01, PNorm = 56.6797, GNorm = 1.2694, lr_0 = 6.0136e-04
Loss = 1.1360e-01, PNorm = 56.6927, GNorm = 0.6904, lr_0 = 6.0095e-04
Loss = 1.1428e-01, PNorm = 56.7061, GNorm = 1.0116, lr_0 = 6.0054e-04
Loss = 1.1793e-01, PNorm = 56.7201, GNorm = 1.1829, lr_0 = 6.0013e-04
Loss = 1.1352e-01, PNorm = 56.7330, GNorm = 0.5168, lr_0 = 5.9972e-04
Loss = 1.2566e-01, PNorm = 56.7540, GNorm = 1.2097, lr_0 = 5.9931e-04
Loss = 1.0724e-01, PNorm = 56.7754, GNorm = 0.5234, lr_0 = 5.9890e-04
Loss = 1.2098e-01, PNorm = 56.7941, GNorm = 0.9260, lr_0 = 5.9849e-04
Loss = 1.1722e-01, PNorm = 56.8065, GNorm = 0.6387, lr_0 = 5.9808e-04
Loss = 9.6964e-02, PNorm = 56.8121, GNorm = 0.5914, lr_0 = 5.9767e-04
Loss = 1.3439e-01, PNorm = 56.8272, GNorm = 0.8632, lr_0 = 5.9726e-04
Loss = 1.1991e-01, PNorm = 56.8455, GNorm = 1.7124, lr_0 = 5.9685e-04
Loss = 1.2124e-01, PNorm = 56.8593, GNorm = 1.4322, lr_0 = 5.9644e-04
Loss = 1.2855e-01, PNorm = 56.8787, GNorm = 1.7832, lr_0 = 5.9603e-04
Loss = 1.0803e-01, PNorm = 56.9045, GNorm = 1.0427, lr_0 = 5.9562e-04
Loss = 9.7552e-02, PNorm = 56.9288, GNorm = 0.7944, lr_0 = 5.9521e-04
Loss = 1.0045e-01, PNorm = 56.9481, GNorm = 0.7398, lr_0 = 5.9481e-04
Loss = 1.1186e-01, PNorm = 56.9602, GNorm = 0.6006, lr_0 = 5.9440e-04
Loss = 1.1073e-01, PNorm = 56.9760, GNorm = 1.4057, lr_0 = 5.9399e-04
Loss = 1.1484e-01, PNorm = 56.9909, GNorm = 1.0006, lr_0 = 5.9358e-04
Loss = 1.0725e-01, PNorm = 57.0062, GNorm = 0.9881, lr_0 = 5.9318e-04
Loss = 1.2538e-01, PNorm = 57.0216, GNorm = 0.9044, lr_0 = 5.9277e-04
Loss = 1.1109e-01, PNorm = 57.0357, GNorm = 0.7068, lr_0 = 5.9236e-04
Loss = 1.0580e-01, PNorm = 57.0512, GNorm = 0.6200, lr_0 = 5.9196e-04
Loss = 1.0643e-01, PNorm = 57.0689, GNorm = 0.8733, lr_0 = 5.9155e-04
Loss = 1.2387e-01, PNorm = 57.0882, GNorm = 0.7639, lr_0 = 5.9115e-04
Loss = 1.0985e-01, PNorm = 57.1082, GNorm = 0.7478, lr_0 = 5.9074e-04
Loss = 1.0173e-01, PNorm = 57.1266, GNorm = 0.4644, lr_0 = 5.9034e-04
Loss = 1.2500e-01, PNorm = 57.1414, GNorm = 1.1120, lr_0 = 5.8993e-04
Loss = 1.2531e-01, PNorm = 57.1558, GNorm = 1.2310, lr_0 = 5.8953e-04
Loss = 1.0871e-01, PNorm = 57.1686, GNorm = 1.0398, lr_0 = 5.8913e-04
Loss = 1.2212e-01, PNorm = 57.1861, GNorm = 0.7870, lr_0 = 5.8872e-04
Loss = 1.0595e-01, PNorm = 57.2031, GNorm = 0.7758, lr_0 = 5.8832e-04
Loss = 1.0500e-01, PNorm = 57.2143, GNorm = 0.7907, lr_0 = 5.8792e-04
Loss = 1.1543e-01, PNorm = 57.2204, GNorm = 0.9935, lr_0 = 5.8751e-04
Loss = 1.0538e-01, PNorm = 57.2266, GNorm = 1.0237, lr_0 = 5.8711e-04
Loss = 1.1301e-01, PNorm = 57.2452, GNorm = 0.4858, lr_0 = 5.8671e-04
Loss = 1.0307e-01, PNorm = 57.2630, GNorm = 1.1774, lr_0 = 5.8631e-04
Loss = 1.0862e-01, PNorm = 57.2767, GNorm = 0.5931, lr_0 = 5.8591e-04
Loss = 1.0886e-01, PNorm = 57.2930, GNorm = 0.6384, lr_0 = 5.8550e-04
Loss = 1.0233e-01, PNorm = 57.3104, GNorm = 0.5475, lr_0 = 5.8510e-04
Loss = 1.0201e-01, PNorm = 57.3208, GNorm = 0.8909, lr_0 = 5.8470e-04
Loss = 1.0932e-01, PNorm = 57.3360, GNorm = 1.1267, lr_0 = 5.8430e-04
Loss = 1.2807e-01, PNorm = 57.3518, GNorm = 0.9208, lr_0 = 5.8390e-04
Loss = 1.1384e-01, PNorm = 57.3658, GNorm = 0.7937, lr_0 = 5.8350e-04
Loss = 1.0491e-01, PNorm = 57.3872, GNorm = 1.4752, lr_0 = 5.8310e-04
Loss = 1.1500e-01, PNorm = 57.4042, GNorm = 0.7421, lr_0 = 5.8270e-04
Loss = 1.1670e-01, PNorm = 57.4178, GNorm = 0.9962, lr_0 = 5.8230e-04
Loss = 1.1900e-01, PNorm = 57.4304, GNorm = 0.4337, lr_0 = 5.8190e-04
Loss = 1.1488e-01, PNorm = 57.4439, GNorm = 1.3909, lr_0 = 5.8151e-04
Loss = 1.1242e-01, PNorm = 57.4613, GNorm = 0.7896, lr_0 = 5.8111e-04
Loss = 1.0861e-01, PNorm = 57.4746, GNorm = 0.9784, lr_0 = 5.8071e-04
Loss = 1.2398e-01, PNorm = 57.4881, GNorm = 0.9372, lr_0 = 5.8031e-04
Loss = 1.1078e-01, PNorm = 57.5022, GNorm = 1.1361, lr_0 = 5.7991e-04
Loss = 9.8881e-02, PNorm = 57.5126, GNorm = 0.7827, lr_0 = 5.7952e-04
Loss = 9.0020e-02, PNorm = 57.5241, GNorm = 0.8523, lr_0 = 5.7912e-04
Loss = 1.0615e-01, PNorm = 57.5363, GNorm = 0.8788, lr_0 = 5.7872e-04
Loss = 1.1868e-01, PNorm = 57.5485, GNorm = 1.0494, lr_0 = 5.7833e-04
Loss = 9.7400e-02, PNorm = 57.5605, GNorm = 0.6445, lr_0 = 5.7793e-04
Loss = 1.0544e-01, PNorm = 57.5723, GNorm = 0.6839, lr_0 = 5.7753e-04
Loss = 1.1256e-01, PNorm = 57.5831, GNorm = 0.7360, lr_0 = 5.7714e-04
Loss = 1.0115e-01, PNorm = 57.5877, GNorm = 0.9280, lr_0 = 5.7674e-04
Loss = 1.0443e-01, PNorm = 57.5949, GNorm = 1.3814, lr_0 = 5.7635e-04
Loss = 1.0241e-01, PNorm = 57.6053, GNorm = 0.6458, lr_0 = 5.7595e-04
Loss = 1.0620e-01, PNorm = 57.6183, GNorm = 1.2557, lr_0 = 5.7556e-04
Loss = 1.2622e-01, PNorm = 57.6282, GNorm = 0.6900, lr_0 = 5.7516e-04
Loss = 1.2302e-01, PNorm = 57.6409, GNorm = 0.8201, lr_0 = 5.7477e-04
Loss = 1.0696e-01, PNorm = 57.6565, GNorm = 0.6215, lr_0 = 5.7438e-04
Loss = 1.0997e-01, PNorm = 57.6722, GNorm = 0.6538, lr_0 = 5.7398e-04
Loss = 1.0014e-01, PNorm = 57.6870, GNorm = 0.7085, lr_0 = 5.7359e-04
Loss = 9.6968e-02, PNorm = 57.6989, GNorm = 0.6020, lr_0 = 5.7320e-04
Loss = 1.1283e-01, PNorm = 57.7171, GNorm = 0.5748, lr_0 = 5.7280e-04
Loss = 1.1897e-01, PNorm = 57.7292, GNorm = 1.1734, lr_0 = 5.7241e-04
Loss = 9.8082e-02, PNorm = 57.7356, GNorm = 0.5478, lr_0 = 5.7202e-04
Loss = 1.0962e-01, PNorm = 57.7472, GNorm = 0.8158, lr_0 = 5.7163e-04
Loss = 9.0571e-02, PNorm = 57.7594, GNorm = 0.7573, lr_0 = 5.7124e-04
Loss = 1.1446e-01, PNorm = 57.7786, GNorm = 1.2875, lr_0 = 5.7084e-04
Loss = 1.1394e-01, PNorm = 57.7997, GNorm = 1.5741, lr_0 = 5.7045e-04
Loss = 1.2325e-01, PNorm = 57.8248, GNorm = 0.6077, lr_0 = 5.7006e-04
Loss = 1.0607e-01, PNorm = 57.8410, GNorm = 0.9464, lr_0 = 5.6967e-04
Loss = 1.0382e-01, PNorm = 57.8540, GNorm = 0.6352, lr_0 = 5.6928e-04
Loss = 1.1060e-01, PNorm = 57.8645, GNorm = 0.9136, lr_0 = 5.6889e-04
Loss = 1.0800e-01, PNorm = 57.8747, GNorm = 0.7264, lr_0 = 5.6850e-04
Loss = 1.1019e-01, PNorm = 57.8855, GNorm = 1.1762, lr_0 = 5.6811e-04
Loss = 1.2588e-01, PNorm = 57.8963, GNorm = 0.6781, lr_0 = 5.6772e-04
Loss = 1.2001e-01, PNorm = 57.9088, GNorm = 0.7861, lr_0 = 5.6733e-04
Loss = 1.0654e-01, PNorm = 57.9233, GNorm = 0.9235, lr_0 = 5.6695e-04
Loss = 1.0876e-01, PNorm = 57.9351, GNorm = 0.5836, lr_0 = 5.6656e-04
Loss = 1.0995e-01, PNorm = 57.9457, GNorm = 0.9115, lr_0 = 5.6617e-04
Loss = 1.0353e-01, PNorm = 57.9528, GNorm = 0.6342, lr_0 = 5.6578e-04
Loss = 9.3987e-02, PNorm = 57.9595, GNorm = 0.6518, lr_0 = 5.6539e-04
Loss = 1.2097e-01, PNorm = 57.9735, GNorm = 0.7100, lr_0 = 5.6501e-04
Loss = 1.0528e-01, PNorm = 57.9879, GNorm = 1.3350, lr_0 = 5.6462e-04
Loss = 1.3174e-01, PNorm = 57.9997, GNorm = 0.9865, lr_0 = 5.6423e-04
Loss = 1.0788e-01, PNorm = 58.0153, GNorm = 0.5237, lr_0 = 5.6385e-04
Loss = 1.1199e-01, PNorm = 58.0327, GNorm = 0.6967, lr_0 = 5.6346e-04
Loss = 9.6646e-02, PNorm = 58.0494, GNorm = 1.0733, lr_0 = 5.6307e-04
Loss = 1.1815e-01, PNorm = 58.0619, GNorm = 0.8624, lr_0 = 5.6269e-04
Loss = 9.8410e-02, PNorm = 58.0747, GNorm = 0.6737, lr_0 = 5.6230e-04
Validation mae = 0.403616
Epoch 9
Loss = 9.8112e-02, PNorm = 58.0858, GNorm = 1.0693, lr_0 = 5.6192e-04
Loss = 8.9718e-02, PNorm = 58.1008, GNorm = 0.7069, lr_0 = 5.6153e-04
Loss = 8.0813e-02, PNorm = 58.1133, GNorm = 0.5149, lr_0 = 5.6115e-04
Loss = 9.4675e-02, PNorm = 58.1262, GNorm = 0.5510, lr_0 = 5.6076e-04
Loss = 8.7911e-02, PNorm = 58.1398, GNorm = 0.5401, lr_0 = 5.6038e-04
Loss = 1.0172e-01, PNorm = 58.1540, GNorm = 0.6404, lr_0 = 5.6000e-04
Loss = 1.0191e-01, PNorm = 58.1688, GNorm = 0.9207, lr_0 = 5.5961e-04
Loss = 1.0093e-01, PNorm = 58.1793, GNorm = 0.9290, lr_0 = 5.5923e-04
Loss = 1.0109e-01, PNorm = 58.1926, GNorm = 0.6479, lr_0 = 5.5885e-04
Loss = 1.0101e-01, PNorm = 58.2049, GNorm = 0.5880, lr_0 = 5.5846e-04
Loss = 1.0486e-01, PNorm = 58.2152, GNorm = 0.9528, lr_0 = 5.5808e-04
Loss = 9.4888e-02, PNorm = 58.2359, GNorm = 0.7131, lr_0 = 5.5770e-04
Loss = 1.0725e-01, PNorm = 58.2483, GNorm = 1.4861, lr_0 = 5.5732e-04
Loss = 1.0173e-01, PNorm = 58.2599, GNorm = 0.8281, lr_0 = 5.5693e-04
Loss = 1.0058e-01, PNorm = 58.2784, GNorm = 0.9463, lr_0 = 5.5655e-04
Loss = 1.0824e-01, PNorm = 58.3009, GNorm = 0.7967, lr_0 = 5.5617e-04
Loss = 9.3621e-02, PNorm = 58.3165, GNorm = 0.8741, lr_0 = 5.5579e-04
Loss = 8.3337e-02, PNorm = 58.3285, GNorm = 0.7136, lr_0 = 5.5541e-04
Loss = 1.0318e-01, PNorm = 58.3375, GNorm = 0.9291, lr_0 = 5.5503e-04
Loss = 9.9895e-02, PNorm = 58.3498, GNorm = 0.5603, lr_0 = 5.5465e-04
Loss = 9.7405e-02, PNorm = 58.3601, GNorm = 0.5480, lr_0 = 5.5427e-04
Loss = 1.0805e-01, PNorm = 58.3711, GNorm = 0.5723, lr_0 = 5.5389e-04
Loss = 1.1179e-01, PNorm = 58.3851, GNorm = 1.5585, lr_0 = 5.5351e-04
Loss = 1.0006e-01, PNorm = 58.4033, GNorm = 0.6071, lr_0 = 5.5313e-04
Loss = 1.1048e-01, PNorm = 58.4170, GNorm = 0.7971, lr_0 = 5.5275e-04
Loss = 1.1070e-01, PNorm = 58.4316, GNorm = 0.7295, lr_0 = 5.5237e-04
Loss = 1.0358e-01, PNorm = 58.4489, GNorm = 1.3282, lr_0 = 5.5199e-04
Loss = 1.0191e-01, PNorm = 58.4685, GNorm = 0.8601, lr_0 = 5.5162e-04
Loss = 8.9885e-02, PNorm = 58.4832, GNorm = 0.5198, lr_0 = 5.5124e-04
Loss = 1.1473e-01, PNorm = 58.4963, GNorm = 1.0896, lr_0 = 5.5086e-04
Loss = 9.7748e-02, PNorm = 58.5099, GNorm = 0.5974, lr_0 = 5.5048e-04
Loss = 1.0263e-01, PNorm = 58.5247, GNorm = 1.3596, lr_0 = 5.5011e-04
Loss = 8.7819e-02, PNorm = 58.5394, GNorm = 0.6348, lr_0 = 5.4973e-04
Loss = 1.0675e-01, PNorm = 58.5540, GNorm = 0.5614, lr_0 = 5.4935e-04
Loss = 8.8841e-02, PNorm = 58.5690, GNorm = 0.6606, lr_0 = 5.4898e-04
Loss = 9.4001e-02, PNorm = 58.5814, GNorm = 0.4264, lr_0 = 5.4860e-04
Loss = 9.0630e-02, PNorm = 58.5963, GNorm = 0.4850, lr_0 = 5.4822e-04
Loss = 9.8107e-02, PNorm = 58.6139, GNorm = 1.2528, lr_0 = 5.4785e-04
Loss = 9.5305e-02, PNorm = 58.6276, GNorm = 0.5841, lr_0 = 5.4747e-04
Loss = 8.8421e-02, PNorm = 58.6418, GNorm = 0.5780, lr_0 = 5.4710e-04
Loss = 1.1357e-01, PNorm = 58.6568, GNorm = 0.7749, lr_0 = 5.4672e-04
Loss = 9.8587e-02, PNorm = 58.6734, GNorm = 0.5934, lr_0 = 5.4635e-04
Loss = 1.2099e-01, PNorm = 58.6937, GNorm = 1.2583, lr_0 = 5.4597e-04
Loss = 1.0927e-01, PNorm = 58.7143, GNorm = 1.4714, lr_0 = 5.4560e-04
Loss = 1.1371e-01, PNorm = 58.7300, GNorm = 1.0939, lr_0 = 5.4523e-04
Loss = 1.0392e-01, PNorm = 58.7393, GNorm = 0.6881, lr_0 = 5.4485e-04
Loss = 9.2765e-02, PNorm = 58.7480, GNorm = 1.0111, lr_0 = 5.4448e-04
Loss = 9.9244e-02, PNorm = 58.7608, GNorm = 0.5195, lr_0 = 5.4411e-04
Loss = 9.3300e-02, PNorm = 58.7735, GNorm = 0.7791, lr_0 = 5.4373e-04
Loss = 1.0848e-01, PNorm = 58.7864, GNorm = 1.0207, lr_0 = 5.4336e-04
Loss = 1.0758e-01, PNorm = 58.8000, GNorm = 1.3567, lr_0 = 5.4299e-04
Loss = 1.1384e-01, PNorm = 58.8154, GNorm = 1.3694, lr_0 = 5.4262e-04
Loss = 1.2461e-01, PNorm = 58.8329, GNorm = 1.0481, lr_0 = 5.4225e-04
Loss = 9.9440e-02, PNorm = 58.8524, GNorm = 1.3055, lr_0 = 5.4187e-04
Loss = 9.4699e-02, PNorm = 58.8723, GNorm = 0.5892, lr_0 = 5.4150e-04
Loss = 9.8041e-02, PNorm = 58.8874, GNorm = 0.8685, lr_0 = 5.4113e-04
Loss = 1.0570e-01, PNorm = 58.9030, GNorm = 0.8007, lr_0 = 5.4076e-04
Loss = 1.0545e-01, PNorm = 58.9130, GNorm = 0.8495, lr_0 = 5.4039e-04
Loss = 9.0442e-02, PNorm = 58.9252, GNorm = 0.8847, lr_0 = 5.4002e-04
Loss = 1.0585e-01, PNorm = 58.9372, GNorm = 0.7992, lr_0 = 5.3965e-04
Loss = 1.0401e-01, PNorm = 58.9485, GNorm = 0.7294, lr_0 = 5.3928e-04
Loss = 1.0373e-01, PNorm = 58.9671, GNorm = 0.4557, lr_0 = 5.3891e-04
Loss = 1.0270e-01, PNorm = 58.9843, GNorm = 0.6979, lr_0 = 5.3854e-04
Loss = 9.8802e-02, PNorm = 58.9996, GNorm = 0.5987, lr_0 = 5.3817e-04
Loss = 9.5754e-02, PNorm = 59.0126, GNorm = 1.0750, lr_0 = 5.3781e-04
Loss = 1.2000e-01, PNorm = 59.0273, GNorm = 0.6131, lr_0 = 5.3744e-04
Loss = 9.5946e-02, PNorm = 59.0404, GNorm = 0.5330, lr_0 = 5.3707e-04
Loss = 1.0367e-01, PNorm = 59.0492, GNorm = 1.1885, lr_0 = 5.3670e-04
Loss = 1.0417e-01, PNorm = 59.0653, GNorm = 0.5587, lr_0 = 5.3633e-04
Loss = 1.0585e-01, PNorm = 59.0873, GNorm = 0.5968, lr_0 = 5.3597e-04
Loss = 1.0418e-01, PNorm = 59.1069, GNorm = 1.2790, lr_0 = 5.3560e-04
Loss = 1.0692e-01, PNorm = 59.1220, GNorm = 0.4823, lr_0 = 5.3523e-04
Loss = 1.1589e-01, PNorm = 59.1334, GNorm = 1.4753, lr_0 = 5.3486e-04
Loss = 9.9204e-02, PNorm = 59.1433, GNorm = 0.6565, lr_0 = 5.3450e-04
Loss = 9.9243e-02, PNorm = 59.1570, GNorm = 0.9178, lr_0 = 5.3413e-04
Loss = 1.0901e-01, PNorm = 59.1705, GNorm = 1.4575, lr_0 = 5.3377e-04
Loss = 9.6514e-02, PNorm = 59.1867, GNorm = 0.7256, lr_0 = 5.3340e-04
Loss = 1.0585e-01, PNorm = 59.2043, GNorm = 0.8927, lr_0 = 5.3304e-04
Loss = 9.7162e-02, PNorm = 59.2210, GNorm = 0.9719, lr_0 = 5.3267e-04
Loss = 1.4052e-01, PNorm = 59.2378, GNorm = 1.8377, lr_0 = 5.3231e-04
Loss = 1.2369e-01, PNorm = 59.2544, GNorm = 1.5915, lr_0 = 5.3194e-04
Loss = 1.0994e-01, PNorm = 59.2707, GNorm = 1.1205, lr_0 = 5.3158e-04
Loss = 1.0414e-01, PNorm = 59.2878, GNorm = 1.2421, lr_0 = 5.3121e-04
Loss = 9.1681e-02, PNorm = 59.2969, GNorm = 0.9810, lr_0 = 5.3085e-04
Loss = 1.0210e-01, PNorm = 59.3078, GNorm = 0.9672, lr_0 = 5.3048e-04
Loss = 9.2885e-02, PNorm = 59.3220, GNorm = 0.6935, lr_0 = 5.3012e-04
Loss = 1.0944e-01, PNorm = 59.3314, GNorm = 0.9378, lr_0 = 5.2976e-04
Loss = 1.0441e-01, PNorm = 59.3466, GNorm = 0.6232, lr_0 = 5.2939e-04
Loss = 8.5810e-02, PNorm = 59.3564, GNorm = 0.6440, lr_0 = 5.2903e-04
Loss = 1.1467e-01, PNorm = 59.3678, GNorm = 0.7849, lr_0 = 5.2867e-04
Loss = 1.0212e-01, PNorm = 59.3833, GNorm = 0.8742, lr_0 = 5.2831e-04
Loss = 1.0595e-01, PNorm = 59.3921, GNorm = 0.8286, lr_0 = 5.2795e-04
Loss = 1.0421e-01, PNorm = 59.4005, GNorm = 0.4581, lr_0 = 5.2758e-04
Loss = 9.2643e-02, PNorm = 59.4134, GNorm = 0.9992, lr_0 = 5.2722e-04
Loss = 9.7495e-02, PNorm = 59.4282, GNorm = 0.7320, lr_0 = 5.2686e-04
Loss = 1.1144e-01, PNorm = 59.4437, GNorm = 1.0364, lr_0 = 5.2650e-04
Loss = 8.6787e-02, PNorm = 59.4586, GNorm = 0.7697, lr_0 = 5.2614e-04
Loss = 9.1639e-02, PNorm = 59.4703, GNorm = 0.5478, lr_0 = 5.2578e-04
Loss = 1.0771e-01, PNorm = 59.4793, GNorm = 1.7653, lr_0 = 5.2542e-04
Loss = 8.5986e-02, PNorm = 59.4873, GNorm = 0.7698, lr_0 = 5.2506e-04
Loss = 1.0681e-01, PNorm = 59.4963, GNorm = 0.4978, lr_0 = 5.2470e-04
Loss = 9.8365e-02, PNorm = 59.5057, GNorm = 0.8934, lr_0 = 5.2434e-04
Loss = 8.3018e-02, PNorm = 59.5183, GNorm = 0.5999, lr_0 = 5.2398e-04
Loss = 8.6054e-02, PNorm = 59.5280, GNorm = 0.5231, lr_0 = 5.2362e-04
Loss = 1.0069e-01, PNorm = 59.5385, GNorm = 1.0898, lr_0 = 5.2326e-04
Loss = 9.9127e-02, PNorm = 59.5499, GNorm = 0.5766, lr_0 = 5.2290e-04
Loss = 1.0638e-01, PNorm = 59.5614, GNorm = 0.9349, lr_0 = 5.2255e-04
Loss = 1.0910e-01, PNorm = 59.5732, GNorm = 1.2139, lr_0 = 5.2219e-04
Loss = 1.1723e-01, PNorm = 59.5866, GNorm = 1.0553, lr_0 = 5.2183e-04
Loss = 8.3906e-02, PNorm = 59.6021, GNorm = 0.6726, lr_0 = 5.2147e-04
Loss = 9.1645e-02, PNorm = 59.6163, GNorm = 0.5873, lr_0 = 5.2112e-04
Loss = 1.0240e-01, PNorm = 59.6278, GNorm = 0.8047, lr_0 = 5.2076e-04
Loss = 9.6402e-02, PNorm = 59.6357, GNorm = 0.7086, lr_0 = 5.2040e-04
Loss = 9.6277e-02, PNorm = 59.6463, GNorm = 0.8687, lr_0 = 5.2005e-04
Loss = 1.1783e-01, PNorm = 59.6576, GNorm = 0.5477, lr_0 = 5.1969e-04
Loss = 1.0020e-01, PNorm = 59.6695, GNorm = 0.6121, lr_0 = 5.1933e-04
Loss = 9.9717e-02, PNorm = 59.6862, GNorm = 0.7233, lr_0 = 5.1898e-04
Loss = 1.3082e-01, PNorm = 59.6984, GNorm = 0.5894, lr_0 = 5.1862e-04
Loss = 1.0297e-01, PNorm = 59.7118, GNorm = 0.6237, lr_0 = 5.1827e-04
Loss = 9.5293e-02, PNorm = 59.7224, GNorm = 1.1626, lr_0 = 5.1791e-04
Validation mae = 0.416026
Epoch 10
Loss = 8.5670e-02, PNorm = 59.7351, GNorm = 1.2475, lr_0 = 5.1756e-04
Loss = 8.6461e-02, PNorm = 59.7495, GNorm = 0.5728, lr_0 = 5.1720e-04
Loss = 8.7794e-02, PNorm = 59.7601, GNorm = 0.6054, lr_0 = 5.1685e-04
Loss = 8.9104e-02, PNorm = 59.7688, GNorm = 0.6693, lr_0 = 5.1649e-04
Loss = 8.7776e-02, PNorm = 59.7770, GNorm = 0.5086, lr_0 = 5.1614e-04
Loss = 9.3702e-02, PNorm = 59.7859, GNorm = 0.5858, lr_0 = 5.1579e-04
Loss = 9.7233e-02, PNorm = 59.7993, GNorm = 0.6330, lr_0 = 5.1543e-04
Loss = 9.7766e-02, PNorm = 59.8155, GNorm = 1.1199, lr_0 = 5.1508e-04
Loss = 7.9865e-02, PNorm = 59.8281, GNorm = 0.6811, lr_0 = 5.1473e-04
Loss = 8.9359e-02, PNorm = 59.8395, GNorm = 0.6472, lr_0 = 5.1437e-04
Loss = 9.6375e-02, PNorm = 59.8484, GNorm = 0.7590, lr_0 = 5.1402e-04
Loss = 9.5420e-02, PNorm = 59.8601, GNorm = 0.8215, lr_0 = 5.1367e-04
Loss = 9.4301e-02, PNorm = 59.8730, GNorm = 0.9578, lr_0 = 5.1332e-04
Loss = 9.2079e-02, PNorm = 59.8848, GNorm = 0.6808, lr_0 = 5.1297e-04
Loss = 8.1150e-02, PNorm = 59.8984, GNorm = 0.6349, lr_0 = 5.1262e-04
Loss = 9.9358e-02, PNorm = 59.9144, GNorm = 0.7162, lr_0 = 5.1226e-04
Loss = 8.2270e-02, PNorm = 59.9330, GNorm = 0.4140, lr_0 = 5.1191e-04
Loss = 9.6410e-02, PNorm = 59.9468, GNorm = 1.0920, lr_0 = 5.1156e-04
Loss = 1.0111e-01, PNorm = 59.9607, GNorm = 0.5004, lr_0 = 5.1121e-04
Loss = 8.6284e-02, PNorm = 59.9758, GNorm = 0.8138, lr_0 = 5.1086e-04
Loss = 1.0415e-01, PNorm = 59.9977, GNorm = 0.6303, lr_0 = 5.1051e-04
Loss = 9.1970e-02, PNorm = 60.0107, GNorm = 0.7466, lr_0 = 5.1016e-04
Loss = 1.0652e-01, PNorm = 60.0229, GNorm = 0.8703, lr_0 = 5.0981e-04
Loss = 1.0255e-01, PNorm = 60.0430, GNorm = 1.0956, lr_0 = 5.0946e-04
Loss = 8.5553e-02, PNorm = 60.0586, GNorm = 0.5718, lr_0 = 5.0911e-04
Loss = 9.5271e-02, PNorm = 60.0716, GNorm = 0.8821, lr_0 = 5.0877e-04
Loss = 8.2732e-02, PNorm = 60.0840, GNorm = 0.6388, lr_0 = 5.0842e-04
Loss = 8.9095e-02, PNorm = 60.0975, GNorm = 0.5756, lr_0 = 5.0807e-04
Loss = 8.6747e-02, PNorm = 60.1101, GNorm = 0.8203, lr_0 = 5.0772e-04
Loss = 9.3748e-02, PNorm = 60.1213, GNorm = 1.3210, lr_0 = 5.0737e-04
Loss = 9.8284e-02, PNorm = 60.1319, GNorm = 0.9078, lr_0 = 5.0703e-04
Loss = 1.0549e-01, PNorm = 60.1462, GNorm = 1.1530, lr_0 = 5.0668e-04
Loss = 8.4496e-02, PNorm = 60.1564, GNorm = 0.5215, lr_0 = 5.0633e-04
Loss = 9.1712e-02, PNorm = 60.1654, GNorm = 0.6010, lr_0 = 5.0598e-04
Loss = 8.6257e-02, PNorm = 60.1780, GNorm = 1.2192, lr_0 = 5.0564e-04
Loss = 1.0013e-01, PNorm = 60.1920, GNorm = 0.5774, lr_0 = 5.0529e-04
Loss = 1.0854e-01, PNorm = 60.2065, GNorm = 1.4387, lr_0 = 5.0494e-04
Loss = 8.3800e-02, PNorm = 60.2191, GNorm = 1.0288, lr_0 = 5.0460e-04
Loss = 1.0340e-01, PNorm = 60.2299, GNorm = 0.7029, lr_0 = 5.0425e-04
Loss = 1.0320e-01, PNorm = 60.2395, GNorm = 0.5155, lr_0 = 5.0391e-04
Loss = 9.8115e-02, PNorm = 60.2524, GNorm = 0.7098, lr_0 = 5.0356e-04
Loss = 9.4030e-02, PNorm = 60.2672, GNorm = 0.5608, lr_0 = 5.0322e-04
Loss = 8.4962e-02, PNorm = 60.2791, GNorm = 0.4941, lr_0 = 5.0287e-04
Loss = 8.1008e-02, PNorm = 60.2844, GNorm = 0.7116, lr_0 = 5.0253e-04
Loss = 8.7906e-02, PNorm = 60.2911, GNorm = 1.0330, lr_0 = 5.0218e-04
Loss = 1.0320e-01, PNorm = 60.3018, GNorm = 0.5374, lr_0 = 5.0184e-04
Loss = 8.8072e-02, PNorm = 60.3124, GNorm = 1.0761, lr_0 = 5.0150e-04
Loss = 9.3994e-02, PNorm = 60.3225, GNorm = 0.5865, lr_0 = 5.0115e-04
Loss = 7.9376e-02, PNorm = 60.3307, GNorm = 0.8761, lr_0 = 5.0081e-04
Loss = 9.6391e-02, PNorm = 60.3415, GNorm = 0.8122, lr_0 = 5.0047e-04
Loss = 1.2114e-01, PNorm = 60.3545, GNorm = 1.0812, lr_0 = 5.0012e-04
Loss = 1.0300e-01, PNorm = 60.3642, GNorm = 1.5547, lr_0 = 4.9978e-04
Loss = 9.4275e-02, PNorm = 60.3790, GNorm = 0.6702, lr_0 = 4.9944e-04
Loss = 9.7639e-02, PNorm = 60.3965, GNorm = 1.1327, lr_0 = 4.9910e-04
Loss = 1.0172e-01, PNorm = 60.4135, GNorm = 0.6206, lr_0 = 4.9875e-04
Loss = 9.8166e-02, PNorm = 60.4286, GNorm = 1.5947, lr_0 = 4.9841e-04
Loss = 9.5736e-02, PNorm = 60.4459, GNorm = 0.6185, lr_0 = 4.9807e-04
Loss = 9.9830e-02, PNorm = 60.4634, GNorm = 0.5981, lr_0 = 4.9773e-04
Loss = 8.7907e-02, PNorm = 60.4775, GNorm = 0.4870, lr_0 = 4.9739e-04
Loss = 1.0017e-01, PNorm = 60.4864, GNorm = 0.9466, lr_0 = 4.9705e-04
Loss = 1.0478e-01, PNorm = 60.4911, GNorm = 0.6565, lr_0 = 4.9671e-04
Loss = 9.4686e-02, PNorm = 60.5023, GNorm = 0.6929, lr_0 = 4.9637e-04
Loss = 8.4989e-02, PNorm = 60.5147, GNorm = 1.0585, lr_0 = 4.9603e-04
Loss = 7.8466e-02, PNorm = 60.5258, GNorm = 0.5336, lr_0 = 4.9569e-04
Loss = 9.2996e-02, PNorm = 60.5418, GNorm = 0.5248, lr_0 = 4.9535e-04
Loss = 1.0346e-01, PNorm = 60.5597, GNorm = 0.6790, lr_0 = 4.9501e-04
Loss = 1.0331e-01, PNorm = 60.5732, GNorm = 0.7122, lr_0 = 4.9467e-04
Loss = 9.5768e-02, PNorm = 60.5837, GNorm = 1.1215, lr_0 = 4.9433e-04
Loss = 9.9583e-02, PNorm = 60.5968, GNorm = 0.9807, lr_0 = 4.9399e-04
Loss = 9.1752e-02, PNorm = 60.6118, GNorm = 0.5669, lr_0 = 4.9365e-04
Loss = 8.7593e-02, PNorm = 60.6201, GNorm = 0.4979, lr_0 = 4.9332e-04
Loss = 9.6343e-02, PNorm = 60.6286, GNorm = 1.0587, lr_0 = 4.9298e-04
Loss = 1.0165e-01, PNorm = 60.6377, GNorm = 0.6959, lr_0 = 4.9264e-04
Loss = 8.1901e-02, PNorm = 60.6478, GNorm = 0.6095, lr_0 = 4.9230e-04
Loss = 9.1724e-02, PNorm = 60.6590, GNorm = 1.1639, lr_0 = 4.9197e-04
Loss = 9.5134e-02, PNorm = 60.6744, GNorm = 0.6340, lr_0 = 4.9163e-04
Loss = 8.6831e-02, PNorm = 60.6867, GNorm = 0.4749, lr_0 = 4.9129e-04
Loss = 8.5149e-02, PNorm = 60.6950, GNorm = 0.4572, lr_0 = 4.9095e-04
Loss = 1.0166e-01, PNorm = 60.7034, GNorm = 0.7514, lr_0 = 4.9062e-04
Loss = 1.0160e-01, PNorm = 60.7130, GNorm = 0.5659, lr_0 = 4.9028e-04
Loss = 7.9460e-02, PNorm = 60.7212, GNorm = 0.6552, lr_0 = 4.8995e-04
Loss = 9.6135e-02, PNorm = 60.7291, GNorm = 0.6564, lr_0 = 4.8961e-04
Loss = 8.7102e-02, PNorm = 60.7430, GNorm = 1.0019, lr_0 = 4.8928e-04
Loss = 1.0648e-01, PNorm = 60.7560, GNorm = 0.6949, lr_0 = 4.8894e-04
Loss = 8.3146e-02, PNorm = 60.7717, GNorm = 0.6748, lr_0 = 4.8861e-04
Loss = 9.6268e-02, PNorm = 60.7836, GNorm = 0.7158, lr_0 = 4.8827e-04
Loss = 8.5111e-02, PNorm = 60.7940, GNorm = 0.9275, lr_0 = 4.8794e-04
Loss = 1.0586e-01, PNorm = 60.8074, GNorm = 1.0674, lr_0 = 4.8760e-04
Loss = 9.1644e-02, PNorm = 60.8194, GNorm = 0.6770, lr_0 = 4.8727e-04
Loss = 1.1150e-01, PNorm = 60.8304, GNorm = 0.7016, lr_0 = 4.8693e-04
Loss = 9.2263e-02, PNorm = 60.8437, GNorm = 0.8145, lr_0 = 4.8660e-04
Loss = 8.6406e-02, PNorm = 60.8549, GNorm = 0.6254, lr_0 = 4.8627e-04
Loss = 9.6994e-02, PNorm = 60.8630, GNorm = 0.8583, lr_0 = 4.8593e-04
Loss = 9.5033e-02, PNorm = 60.8765, GNorm = 0.5465, lr_0 = 4.8560e-04
Loss = 8.5010e-02, PNorm = 60.8889, GNorm = 0.6713, lr_0 = 4.8527e-04
Loss = 1.0292e-01, PNorm = 60.8975, GNorm = 0.9248, lr_0 = 4.8494e-04
Loss = 1.0243e-01, PNorm = 60.9072, GNorm = 1.0156, lr_0 = 4.8460e-04
Loss = 9.5609e-02, PNorm = 60.9182, GNorm = 0.7978, lr_0 = 4.8427e-04
Loss = 9.0827e-02, PNorm = 60.9261, GNorm = 1.2507, lr_0 = 4.8394e-04
Loss = 1.0336e-01, PNorm = 60.9388, GNorm = 0.5547, lr_0 = 4.8361e-04
Loss = 1.1429e-01, PNorm = 60.9516, GNorm = 0.5853, lr_0 = 4.8328e-04
Loss = 1.0444e-01, PNorm = 60.9608, GNorm = 0.6238, lr_0 = 4.8295e-04
Loss = 1.0407e-01, PNorm = 60.9721, GNorm = 0.7705, lr_0 = 4.8262e-04
Loss = 8.8417e-02, PNorm = 60.9847, GNorm = 0.5831, lr_0 = 4.8228e-04
Loss = 8.7906e-02, PNorm = 60.9943, GNorm = 1.0920, lr_0 = 4.8195e-04
Loss = 1.0675e-01, PNorm = 61.0083, GNorm = 0.7973, lr_0 = 4.8162e-04
Loss = 1.0330e-01, PNorm = 61.0172, GNorm = 0.6502, lr_0 = 4.8129e-04
Loss = 1.0662e-01, PNorm = 61.0257, GNorm = 0.6871, lr_0 = 4.8096e-04
Loss = 9.6889e-02, PNorm = 61.0371, GNorm = 0.9162, lr_0 = 4.8064e-04
Loss = 9.9274e-02, PNorm = 61.0501, GNorm = 1.2787, lr_0 = 4.8031e-04
Loss = 1.0339e-01, PNorm = 61.0668, GNorm = 0.8572, lr_0 = 4.7998e-04
Loss = 8.8879e-02, PNorm = 61.0827, GNorm = 0.6278, lr_0 = 4.7965e-04
Loss = 9.6517e-02, PNorm = 61.0955, GNorm = 0.3987, lr_0 = 4.7932e-04
Loss = 9.8130e-02, PNorm = 61.1079, GNorm = 0.6298, lr_0 = 4.7899e-04
Loss = 9.6153e-02, PNorm = 61.1178, GNorm = 0.8794, lr_0 = 4.7866e-04
Loss = 8.7199e-02, PNorm = 61.1229, GNorm = 0.9427, lr_0 = 4.7833e-04
Loss = 9.5845e-02, PNorm = 61.1298, GNorm = 0.6182, lr_0 = 4.7801e-04
Loss = 9.2902e-02, PNorm = 61.1396, GNorm = 1.3921, lr_0 = 4.7768e-04
Loss = 9.1325e-02, PNorm = 61.1521, GNorm = 0.7891, lr_0 = 4.7735e-04
Loss = 8.2031e-02, PNorm = 61.1591, GNorm = 1.3214, lr_0 = 4.7703e-04
Validation mae = 0.414039
Epoch 11
Loss = 7.5799e-02, PNorm = 61.1675, GNorm = 0.8069, lr_0 = 4.7670e-04
Loss = 7.8642e-02, PNorm = 61.1758, GNorm = 1.2065, lr_0 = 4.7637e-04
Loss = 7.7884e-02, PNorm = 61.1828, GNorm = 0.9602, lr_0 = 4.7605e-04
Loss = 8.5616e-02, PNorm = 61.1935, GNorm = 0.6385, lr_0 = 4.7572e-04
Loss = 9.6933e-02, PNorm = 61.2064, GNorm = 1.2285, lr_0 = 4.7539e-04
Loss = 8.1331e-02, PNorm = 61.2188, GNorm = 0.5821, lr_0 = 4.7507e-04
Loss = 8.0792e-02, PNorm = 61.2324, GNorm = 0.7089, lr_0 = 4.7474e-04
Loss = 8.3279e-02, PNorm = 61.2485, GNorm = 0.6466, lr_0 = 4.7442e-04
Loss = 7.0113e-02, PNorm = 61.2629, GNorm = 0.3812, lr_0 = 4.7409e-04
Loss = 9.0896e-02, PNorm = 61.2751, GNorm = 0.9211, lr_0 = 4.7377e-04
Loss = 9.2969e-02, PNorm = 61.2870, GNorm = 1.2711, lr_0 = 4.7344e-04
Loss = 9.0678e-02, PNorm = 61.2986, GNorm = 0.7205, lr_0 = 4.7312e-04
Loss = 8.5564e-02, PNorm = 61.3112, GNorm = 1.0100, lr_0 = 4.7279e-04
Loss = 8.8466e-02, PNorm = 61.3271, GNorm = 0.5745, lr_0 = 4.7247e-04
Loss = 8.4954e-02, PNorm = 61.3388, GNorm = 0.7751, lr_0 = 4.7215e-04
Loss = 7.4505e-02, PNorm = 61.3502, GNorm = 0.7655, lr_0 = 4.7182e-04
Loss = 7.9554e-02, PNorm = 61.3569, GNorm = 0.3991, lr_0 = 4.7150e-04
Loss = 7.7837e-02, PNorm = 61.3689, GNorm = 0.6052, lr_0 = 4.7118e-04
Loss = 9.4023e-02, PNorm = 61.3810, GNorm = 0.5232, lr_0 = 4.7085e-04
Loss = 9.1138e-02, PNorm = 61.3945, GNorm = 1.1181, lr_0 = 4.7053e-04
Loss = 9.2852e-02, PNorm = 61.4076, GNorm = 0.6608, lr_0 = 4.7021e-04
Loss = 8.8428e-02, PNorm = 61.4166, GNorm = 0.7778, lr_0 = 4.6989e-04
Loss = 7.6072e-02, PNorm = 61.4255, GNorm = 0.7325, lr_0 = 4.6957e-04
Loss = 1.0485e-01, PNorm = 61.4422, GNorm = 1.1177, lr_0 = 4.6924e-04
Loss = 9.3465e-02, PNorm = 61.4579, GNorm = 0.5864, lr_0 = 4.6892e-04
Loss = 8.8480e-02, PNorm = 61.4716, GNorm = 0.8191, lr_0 = 4.6860e-04
Loss = 1.0259e-01, PNorm = 61.4796, GNorm = 0.8110, lr_0 = 4.6828e-04
Loss = 9.7247e-02, PNorm = 61.4882, GNorm = 0.6185, lr_0 = 4.6796e-04
Loss = 9.3062e-02, PNorm = 61.5051, GNorm = 0.5109, lr_0 = 4.6764e-04
Loss = 8.8330e-02, PNorm = 61.5171, GNorm = 0.6995, lr_0 = 4.6732e-04
Loss = 8.2992e-02, PNorm = 61.5288, GNorm = 0.5370, lr_0 = 4.6700e-04
Loss = 7.8800e-02, PNorm = 61.5393, GNorm = 0.5341, lr_0 = 4.6668e-04
Loss = 7.6961e-02, PNorm = 61.5542, GNorm = 0.6035, lr_0 = 4.6636e-04
Loss = 9.5505e-02, PNorm = 61.5668, GNorm = 0.5514, lr_0 = 4.6604e-04
Loss = 8.9973e-02, PNorm = 61.5761, GNorm = 0.8128, lr_0 = 4.6572e-04
Loss = 8.0379e-02, PNorm = 61.5848, GNorm = 0.5814, lr_0 = 4.6540e-04
Loss = 7.8756e-02, PNorm = 61.5952, GNorm = 0.8811, lr_0 = 4.6508e-04
Loss = 8.0943e-02, PNorm = 61.6074, GNorm = 0.7021, lr_0 = 4.6476e-04
Loss = 8.5023e-02, PNorm = 61.6186, GNorm = 0.9154, lr_0 = 4.6445e-04
Loss = 9.0278e-02, PNorm = 61.6290, GNorm = 0.6744, lr_0 = 4.6413e-04
Loss = 8.7857e-02, PNorm = 61.6410, GNorm = 0.7506, lr_0 = 4.6381e-04
Loss = 8.4678e-02, PNorm = 61.6494, GNorm = 0.6408, lr_0 = 4.6349e-04
Loss = 8.6565e-02, PNorm = 61.6602, GNorm = 0.6520, lr_0 = 4.6317e-04
Loss = 9.7383e-02, PNorm = 61.6718, GNorm = 0.7415, lr_0 = 4.6286e-04
Loss = 8.9467e-02, PNorm = 61.6857, GNorm = 0.5169, lr_0 = 4.6254e-04
Loss = 8.8846e-02, PNorm = 61.6967, GNorm = 0.6347, lr_0 = 4.6222e-04
Loss = 8.2656e-02, PNorm = 61.7046, GNorm = 0.6556, lr_0 = 4.6191e-04
Loss = 9.7286e-02, PNorm = 61.7159, GNorm = 1.0343, lr_0 = 4.6159e-04
Loss = 8.8850e-02, PNorm = 61.7296, GNorm = 1.0512, lr_0 = 4.6127e-04
Loss = 8.8444e-02, PNorm = 61.7424, GNorm = 1.0209, lr_0 = 4.6096e-04
Loss = 8.5650e-02, PNorm = 61.7551, GNorm = 0.4880, lr_0 = 4.6064e-04
Loss = 8.9521e-02, PNorm = 61.7670, GNorm = 0.6197, lr_0 = 4.6033e-04
Loss = 8.6594e-02, PNorm = 61.7798, GNorm = 0.5414, lr_0 = 4.6001e-04
Loss = 8.2899e-02, PNorm = 61.7899, GNorm = 0.4528, lr_0 = 4.5970e-04
Loss = 8.3999e-02, PNorm = 61.8006, GNorm = 0.6084, lr_0 = 4.5938e-04
Loss = 8.7467e-02, PNorm = 61.8074, GNorm = 0.5938, lr_0 = 4.5907e-04
Loss = 7.0861e-02, PNorm = 61.8166, GNorm = 0.4379, lr_0 = 4.5875e-04
Loss = 7.7916e-02, PNorm = 61.8301, GNorm = 0.7313, lr_0 = 4.5844e-04
Loss = 8.1518e-02, PNorm = 61.8412, GNorm = 0.5379, lr_0 = 4.5812e-04
Loss = 8.2639e-02, PNorm = 61.8486, GNorm = 0.8109, lr_0 = 4.5781e-04
Loss = 8.9590e-02, PNorm = 61.8576, GNorm = 0.6420, lr_0 = 4.5750e-04
Loss = 1.0246e-01, PNorm = 61.8686, GNorm = 0.8111, lr_0 = 4.5718e-04
Loss = 9.0896e-02, PNorm = 61.8827, GNorm = 0.8233, lr_0 = 4.5687e-04
Loss = 8.1667e-02, PNorm = 61.8960, GNorm = 0.5772, lr_0 = 4.5656e-04
Loss = 7.9666e-02, PNorm = 61.9053, GNorm = 0.5959, lr_0 = 4.5624e-04
Loss = 7.9069e-02, PNorm = 61.9142, GNorm = 0.8178, lr_0 = 4.5593e-04
Loss = 8.3813e-02, PNorm = 61.9251, GNorm = 0.9461, lr_0 = 4.5562e-04
Loss = 9.9329e-02, PNorm = 61.9367, GNorm = 0.5684, lr_0 = 4.5531e-04
Loss = 8.8167e-02, PNorm = 61.9441, GNorm = 0.6503, lr_0 = 4.5499e-04
Loss = 8.9922e-02, PNorm = 61.9542, GNorm = 0.5877, lr_0 = 4.5468e-04
Loss = 8.1580e-02, PNorm = 61.9629, GNorm = 0.4679, lr_0 = 4.5437e-04
Loss = 9.0876e-02, PNorm = 61.9715, GNorm = 0.6248, lr_0 = 4.5406e-04
Loss = 8.9301e-02, PNorm = 61.9831, GNorm = 0.6493, lr_0 = 4.5375e-04
Loss = 8.7293e-02, PNorm = 61.9957, GNorm = 0.6297, lr_0 = 4.5344e-04
Loss = 9.1979e-02, PNorm = 62.0094, GNorm = 0.7805, lr_0 = 4.5313e-04
Loss = 9.4465e-02, PNorm = 62.0195, GNorm = 0.6687, lr_0 = 4.5282e-04
Loss = 8.6945e-02, PNorm = 62.0314, GNorm = 0.8486, lr_0 = 4.5251e-04
Loss = 9.3025e-02, PNorm = 62.0366, GNorm = 0.8052, lr_0 = 4.5220e-04
Loss = 9.1878e-02, PNorm = 62.0438, GNorm = 0.5165, lr_0 = 4.5189e-04
Loss = 9.2277e-02, PNorm = 62.0605, GNorm = 0.8790, lr_0 = 4.5158e-04
Loss = 8.1198e-02, PNorm = 62.0742, GNorm = 0.8854, lr_0 = 4.5127e-04
Loss = 9.7064e-02, PNorm = 62.0835, GNorm = 1.4637, lr_0 = 4.5096e-04
Loss = 1.0548e-01, PNorm = 62.0936, GNorm = 0.5262, lr_0 = 4.5065e-04
Loss = 1.0084e-01, PNorm = 62.1060, GNorm = 1.1775, lr_0 = 4.5034e-04
Loss = 9.5604e-02, PNorm = 62.1199, GNorm = 0.9226, lr_0 = 4.5003e-04
Loss = 8.4999e-02, PNorm = 62.1365, GNorm = 1.4992, lr_0 = 4.4972e-04
Loss = 8.6884e-02, PNorm = 62.1470, GNorm = 1.0233, lr_0 = 4.4942e-04
Loss = 1.1506e-01, PNorm = 62.1585, GNorm = 0.9839, lr_0 = 4.4911e-04
Loss = 9.6640e-02, PNorm = 62.1695, GNorm = 0.8819, lr_0 = 4.4880e-04
Loss = 9.0790e-02, PNorm = 62.1810, GNorm = 0.6796, lr_0 = 4.4849e-04
Loss = 8.2320e-02, PNorm = 62.1869, GNorm = 0.8051, lr_0 = 4.4819e-04
Loss = 8.5106e-02, PNorm = 62.1946, GNorm = 0.9908, lr_0 = 4.4788e-04
Loss = 9.6304e-02, PNorm = 62.2052, GNorm = 1.6191, lr_0 = 4.4757e-04
Loss = 7.3314e-02, PNorm = 62.2149, GNorm = 0.8797, lr_0 = 4.4727e-04
Loss = 8.2368e-02, PNorm = 62.2242, GNorm = 0.4770, lr_0 = 4.4696e-04
Loss = 9.5192e-02, PNorm = 62.2309, GNorm = 0.6275, lr_0 = 4.4665e-04
Loss = 9.2869e-02, PNorm = 62.2411, GNorm = 0.8999, lr_0 = 4.4635e-04
Loss = 9.5622e-02, PNorm = 62.2539, GNorm = 0.8094, lr_0 = 4.4604e-04
Loss = 8.5856e-02, PNorm = 62.2677, GNorm = 0.6506, lr_0 = 4.4574e-04
Loss = 7.9987e-02, PNorm = 62.2817, GNorm = 0.4980, lr_0 = 4.4543e-04
Loss = 9.1468e-02, PNorm = 62.2916, GNorm = 1.0228, lr_0 = 4.4513e-04
Loss = 8.7950e-02, PNorm = 62.2946, GNorm = 1.7834, lr_0 = 4.4482e-04
Loss = 8.2310e-02, PNorm = 62.3006, GNorm = 1.8094, lr_0 = 4.4452e-04
Loss = 7.9967e-02, PNorm = 62.3083, GNorm = 0.4746, lr_0 = 4.4421e-04
Loss = 8.4382e-02, PNorm = 62.3180, GNorm = 0.6774, lr_0 = 4.4391e-04
Loss = 8.0086e-02, PNorm = 62.3296, GNorm = 0.5829, lr_0 = 4.4360e-04
Loss = 7.9456e-02, PNorm = 62.3401, GNorm = 0.6876, lr_0 = 4.4330e-04
Loss = 1.0401e-01, PNorm = 62.3507, GNorm = 0.7717, lr_0 = 4.4299e-04
Loss = 8.7980e-02, PNorm = 62.3578, GNorm = 0.5863, lr_0 = 4.4269e-04
Loss = 9.1531e-02, PNorm = 62.3660, GNorm = 0.5131, lr_0 = 4.4239e-04
Loss = 8.7541e-02, PNorm = 62.3722, GNorm = 0.6624, lr_0 = 4.4209e-04
Loss = 8.8680e-02, PNorm = 62.3819, GNorm = 0.4755, lr_0 = 4.4178e-04
Loss = 1.0082e-01, PNorm = 62.3919, GNorm = 0.8594, lr_0 = 4.4148e-04
Loss = 9.8721e-02, PNorm = 62.4027, GNorm = 0.4997, lr_0 = 4.4118e-04
Loss = 8.9017e-02, PNorm = 62.4139, GNorm = 0.9622, lr_0 = 4.4088e-04
Loss = 9.9919e-02, PNorm = 62.4222, GNorm = 0.8222, lr_0 = 4.4057e-04
Loss = 8.3030e-02, PNorm = 62.4283, GNorm = 0.7911, lr_0 = 4.4027e-04
Loss = 8.8846e-02, PNorm = 62.4359, GNorm = 0.6268, lr_0 = 4.3997e-04
Loss = 9.9040e-02, PNorm = 62.4446, GNorm = 0.8311, lr_0 = 4.3967e-04
Loss = 8.3374e-02, PNorm = 62.4560, GNorm = 1.0871, lr_0 = 4.3937e-04
Validation mae = 0.394857
Epoch 12
Loss = 9.7124e-02, PNorm = 62.4675, GNorm = 1.1584, lr_0 = 4.3907e-04
Loss = 8.0346e-02, PNorm = 62.4771, GNorm = 0.4181, lr_0 = 4.3877e-04
Loss = 7.8467e-02, PNorm = 62.4875, GNorm = 0.5126, lr_0 = 4.3846e-04
Loss = 8.2358e-02, PNorm = 62.4990, GNorm = 0.5695, lr_0 = 4.3816e-04
Loss = 7.5845e-02, PNorm = 62.5101, GNorm = 0.6213, lr_0 = 4.3786e-04
Loss = 6.5152e-02, PNorm = 62.5210, GNorm = 0.5412, lr_0 = 4.3756e-04
Loss = 6.9470e-02, PNorm = 62.5329, GNorm = 0.5432, lr_0 = 4.3726e-04
Loss = 7.0876e-02, PNorm = 62.5449, GNorm = 1.0619, lr_0 = 4.3696e-04
Loss = 7.4890e-02, PNorm = 62.5595, GNorm = 0.6917, lr_0 = 4.3667e-04
Loss = 8.4716e-02, PNorm = 62.5727, GNorm = 1.0512, lr_0 = 4.3637e-04
Loss = 8.2622e-02, PNorm = 62.5827, GNorm = 0.8916, lr_0 = 4.3607e-04
Loss = 7.5078e-02, PNorm = 62.5904, GNorm = 0.5029, lr_0 = 4.3577e-04
Loss = 7.8742e-02, PNorm = 62.6026, GNorm = 0.4811, lr_0 = 4.3547e-04
Loss = 7.8889e-02, PNorm = 62.6177, GNorm = 0.6089, lr_0 = 4.3517e-04
Loss = 8.3687e-02, PNorm = 62.6282, GNorm = 1.0579, lr_0 = 4.3487e-04
Loss = 9.1974e-02, PNorm = 62.6393, GNorm = 0.7197, lr_0 = 4.3458e-04
Loss = 8.3109e-02, PNorm = 62.6518, GNorm = 0.5552, lr_0 = 4.3428e-04
Loss = 7.9430e-02, PNorm = 62.6668, GNorm = 0.7572, lr_0 = 4.3398e-04
Loss = 7.2520e-02, PNorm = 62.6804, GNorm = 0.7603, lr_0 = 4.3368e-04
Loss = 6.8491e-02, PNorm = 62.6915, GNorm = 0.5191, lr_0 = 4.3339e-04
Loss = 8.2843e-02, PNorm = 62.6994, GNorm = 0.4444, lr_0 = 4.3309e-04
Loss = 9.2347e-02, PNorm = 62.7100, GNorm = 0.6648, lr_0 = 4.3279e-04
Loss = 8.3033e-02, PNorm = 62.7218, GNorm = 0.6044, lr_0 = 4.3250e-04
Loss = 7.2022e-02, PNorm = 62.7300, GNorm = 0.6092, lr_0 = 4.3220e-04
Loss = 8.8388e-02, PNorm = 62.7397, GNorm = 0.4995, lr_0 = 4.3190e-04
Loss = 9.0709e-02, PNorm = 62.7492, GNorm = 0.6897, lr_0 = 4.3161e-04
Loss = 8.0580e-02, PNorm = 62.7636, GNorm = 0.4682, lr_0 = 4.3131e-04
Loss = 7.5278e-02, PNorm = 62.7722, GNorm = 0.6788, lr_0 = 4.3102e-04
Loss = 7.9921e-02, PNorm = 62.7794, GNorm = 0.6012, lr_0 = 4.3072e-04
Loss = 7.9695e-02, PNorm = 62.7854, GNorm = 0.9794, lr_0 = 4.3043e-04
Loss = 6.9648e-02, PNorm = 62.7955, GNorm = 0.4078, lr_0 = 4.3013e-04
Loss = 6.6909e-02, PNorm = 62.8050, GNorm = 0.5634, lr_0 = 4.2984e-04
Loss = 9.6309e-02, PNorm = 62.8115, GNorm = 0.6898, lr_0 = 4.2954e-04
Loss = 9.1867e-02, PNorm = 62.8222, GNorm = 0.6778, lr_0 = 4.2925e-04
Loss = 6.4606e-02, PNorm = 62.8374, GNorm = 0.9366, lr_0 = 4.2895e-04
Loss = 7.4193e-02, PNorm = 62.8523, GNorm = 0.5538, lr_0 = 4.2866e-04
Loss = 7.8764e-02, PNorm = 62.8631, GNorm = 0.6352, lr_0 = 4.2837e-04
Loss = 7.9143e-02, PNorm = 62.8749, GNorm = 0.8841, lr_0 = 4.2807e-04
Loss = 7.7403e-02, PNorm = 62.8849, GNorm = 0.8032, lr_0 = 4.2778e-04
Loss = 8.6199e-02, PNorm = 62.8959, GNorm = 0.7082, lr_0 = 4.2749e-04
Loss = 7.4031e-02, PNorm = 62.9045, GNorm = 0.5125, lr_0 = 4.2719e-04
Loss = 7.2199e-02, PNorm = 62.9117, GNorm = 0.7478, lr_0 = 4.2690e-04
Loss = 8.4817e-02, PNorm = 62.9238, GNorm = 0.8239, lr_0 = 4.2661e-04
Loss = 6.5040e-02, PNorm = 62.9378, GNorm = 0.6053, lr_0 = 4.2632e-04
Loss = 8.3296e-02, PNorm = 62.9475, GNorm = 0.5619, lr_0 = 4.2602e-04
Loss = 9.1972e-02, PNorm = 62.9568, GNorm = 0.6283, lr_0 = 4.2573e-04
Loss = 8.1229e-02, PNorm = 62.9655, GNorm = 0.6561, lr_0 = 4.2544e-04
Loss = 8.4679e-02, PNorm = 62.9752, GNorm = 0.4756, lr_0 = 4.2515e-04
Loss = 7.7002e-02, PNorm = 62.9866, GNorm = 0.6082, lr_0 = 4.2486e-04
Loss = 8.5802e-02, PNorm = 62.9965, GNorm = 0.6730, lr_0 = 4.2457e-04
Loss = 7.0050e-02, PNorm = 63.0040, GNorm = 0.6597, lr_0 = 4.2428e-04
Loss = 7.3125e-02, PNorm = 63.0096, GNorm = 0.4668, lr_0 = 4.2399e-04
Loss = 7.8111e-02, PNorm = 63.0179, GNorm = 0.7756, lr_0 = 4.2370e-04
Loss = 7.9913e-02, PNorm = 63.0280, GNorm = 0.6631, lr_0 = 4.2340e-04
Loss = 8.3894e-02, PNorm = 63.0377, GNorm = 0.6546, lr_0 = 4.2311e-04
Loss = 8.6464e-02, PNorm = 63.0442, GNorm = 0.8262, lr_0 = 4.2283e-04
Loss = 8.3411e-02, PNorm = 63.0548, GNorm = 0.5624, lr_0 = 4.2254e-04
Loss = 8.5492e-02, PNorm = 63.0678, GNorm = 0.7954, lr_0 = 4.2225e-04
Loss = 8.3988e-02, PNorm = 63.0751, GNorm = 0.7816, lr_0 = 4.2196e-04
Loss = 8.5694e-02, PNorm = 63.0873, GNorm = 0.7456, lr_0 = 4.2167e-04
Loss = 1.0010e-01, PNorm = 63.1014, GNorm = 0.7095, lr_0 = 4.2138e-04
Loss = 8.5142e-02, PNorm = 63.1134, GNorm = 0.7900, lr_0 = 4.2109e-04
Loss = 8.6408e-02, PNorm = 63.1260, GNorm = 0.7201, lr_0 = 4.2080e-04
Loss = 8.2262e-02, PNorm = 63.1362, GNorm = 0.7589, lr_0 = 4.2051e-04
Loss = 7.7397e-02, PNorm = 63.1472, GNorm = 0.7283, lr_0 = 4.2023e-04
Loss = 8.5558e-02, PNorm = 63.1563, GNorm = 1.3406, lr_0 = 4.1994e-04
Loss = 9.1957e-02, PNorm = 63.1637, GNorm = 0.7596, lr_0 = 4.1965e-04
Loss = 8.1095e-02, PNorm = 63.1730, GNorm = 0.5640, lr_0 = 4.1936e-04
Loss = 9.5934e-02, PNorm = 63.1865, GNorm = 0.6879, lr_0 = 4.1907e-04
Loss = 7.6218e-02, PNorm = 63.2026, GNorm = 0.5814, lr_0 = 4.1879e-04
Loss = 8.7894e-02, PNorm = 63.2096, GNorm = 0.8905, lr_0 = 4.1850e-04
Loss = 6.9891e-02, PNorm = 63.2174, GNorm = 0.6210, lr_0 = 4.1821e-04
Loss = 7.8014e-02, PNorm = 63.2259, GNorm = 0.4859, lr_0 = 4.1793e-04
Loss = 8.2016e-02, PNorm = 63.2338, GNorm = 0.9128, lr_0 = 4.1764e-04
Loss = 7.9370e-02, PNorm = 63.2411, GNorm = 0.6434, lr_0 = 4.1736e-04
Loss = 7.3959e-02, PNorm = 63.2494, GNorm = 0.4774, lr_0 = 4.1707e-04
Loss = 8.8777e-02, PNorm = 63.2590, GNorm = 0.5216, lr_0 = 4.1678e-04
Loss = 8.8215e-02, PNorm = 63.2687, GNorm = 0.6356, lr_0 = 4.1650e-04
Loss = 8.3017e-02, PNorm = 63.2748, GNorm = 0.7036, lr_0 = 4.1621e-04
Loss = 8.0805e-02, PNorm = 63.2827, GNorm = 0.5577, lr_0 = 4.1593e-04
Loss = 8.5663e-02, PNorm = 63.2884, GNorm = 0.6133, lr_0 = 4.1564e-04
Loss = 8.8465e-02, PNorm = 63.2963, GNorm = 0.6081, lr_0 = 4.1536e-04
Loss = 8.8748e-02, PNorm = 63.3062, GNorm = 0.5697, lr_0 = 4.1507e-04
Loss = 8.8006e-02, PNorm = 63.3178, GNorm = 0.8166, lr_0 = 4.1479e-04
Loss = 7.9653e-02, PNorm = 63.3295, GNorm = 0.4908, lr_0 = 4.1450e-04
Loss = 8.6055e-02, PNorm = 63.3395, GNorm = 0.7713, lr_0 = 4.1422e-04
Loss = 8.8632e-02, PNorm = 63.3454, GNorm = 0.6438, lr_0 = 4.1394e-04
Loss = 8.0107e-02, PNorm = 63.3533, GNorm = 0.7625, lr_0 = 4.1365e-04
Loss = 7.3289e-02, PNorm = 63.3635, GNorm = 0.5947, lr_0 = 4.1337e-04
Loss = 8.0489e-02, PNorm = 63.3729, GNorm = 0.6689, lr_0 = 4.1309e-04
Loss = 7.6907e-02, PNorm = 63.3848, GNorm = 0.5856, lr_0 = 4.1280e-04
Loss = 1.0215e-01, PNorm = 63.3973, GNorm = 0.8801, lr_0 = 4.1252e-04
Loss = 7.5456e-02, PNorm = 63.4058, GNorm = 0.5543, lr_0 = 4.1224e-04
Loss = 8.2303e-02, PNorm = 63.4133, GNorm = 1.2641, lr_0 = 4.1196e-04
Loss = 7.7153e-02, PNorm = 63.4190, GNorm = 0.4485, lr_0 = 4.1167e-04
Loss = 7.0363e-02, PNorm = 63.4258, GNorm = 0.6391, lr_0 = 4.1139e-04
Loss = 9.2425e-02, PNorm = 63.4342, GNorm = 0.7855, lr_0 = 4.1111e-04
Loss = 7.2999e-02, PNorm = 63.4469, GNorm = 0.7566, lr_0 = 4.1083e-04
Loss = 8.8124e-02, PNorm = 63.4540, GNorm = 0.4036, lr_0 = 4.1055e-04
Loss = 7.7678e-02, PNorm = 63.4614, GNorm = 1.2241, lr_0 = 4.1027e-04
Loss = 8.1591e-02, PNorm = 63.4685, GNorm = 0.7967, lr_0 = 4.0998e-04
Loss = 7.3551e-02, PNorm = 63.4776, GNorm = 0.5731, lr_0 = 4.0970e-04
Loss = 7.1060e-02, PNorm = 63.4856, GNorm = 0.5777, lr_0 = 4.0942e-04
Loss = 8.1098e-02, PNorm = 63.4932, GNorm = 0.6552, lr_0 = 4.0914e-04
Loss = 8.7304e-02, PNorm = 63.5035, GNorm = 0.6403, lr_0 = 4.0886e-04
Loss = 9.7380e-02, PNorm = 63.5101, GNorm = 0.6956, lr_0 = 4.0858e-04
Loss = 8.3872e-02, PNorm = 63.5205, GNorm = 0.6754, lr_0 = 4.0830e-04
Loss = 8.4031e-02, PNorm = 63.5295, GNorm = 0.5540, lr_0 = 4.0802e-04
Loss = 9.1496e-02, PNorm = 63.5364, GNorm = 0.5648, lr_0 = 4.0774e-04
Loss = 9.1293e-02, PNorm = 63.5482, GNorm = 0.8003, lr_0 = 4.0746e-04
Loss = 8.2168e-02, PNorm = 63.5578, GNorm = 0.5908, lr_0 = 4.0718e-04
Loss = 7.2731e-02, PNorm = 63.5624, GNorm = 0.6894, lr_0 = 4.0691e-04
Loss = 8.9306e-02, PNorm = 63.5699, GNorm = 0.4708, lr_0 = 4.0663e-04
Loss = 8.0964e-02, PNorm = 63.5786, GNorm = 0.4665, lr_0 = 4.0635e-04
Loss = 7.9326e-02, PNorm = 63.5860, GNorm = 1.4301, lr_0 = 4.0607e-04
Loss = 8.3392e-02, PNorm = 63.5991, GNorm = 0.6414, lr_0 = 4.0579e-04
Loss = 7.7591e-02, PNorm = 63.6078, GNorm = 0.4846, lr_0 = 4.0551e-04
Loss = 9.2182e-02, PNorm = 63.6155, GNorm = 0.6327, lr_0 = 4.0524e-04
Loss = 9.3376e-02, PNorm = 63.6252, GNorm = 0.6503, lr_0 = 4.0496e-04
Loss = 8.6944e-02, PNorm = 63.6348, GNorm = 0.5235, lr_0 = 4.0468e-04
Validation mae = 0.398730
Epoch 13
Loss = 7.9526e-02, PNorm = 63.6450, GNorm = 0.8867, lr_0 = 4.0440e-04
Loss = 8.6633e-02, PNorm = 63.6568, GNorm = 0.9724, lr_0 = 4.0413e-04
Loss = 6.8906e-02, PNorm = 63.6670, GNorm = 0.6787, lr_0 = 4.0385e-04
Loss = 7.0038e-02, PNorm = 63.6741, GNorm = 0.6267, lr_0 = 4.0357e-04
Loss = 6.4523e-02, PNorm = 63.6837, GNorm = 0.5276, lr_0 = 4.0330e-04
Loss = 7.0393e-02, PNorm = 63.6945, GNorm = 0.7046, lr_0 = 4.0302e-04
Loss = 6.7600e-02, PNorm = 63.7052, GNorm = 0.6779, lr_0 = 4.0274e-04
Loss = 5.7469e-02, PNorm = 63.7156, GNorm = 0.6659, lr_0 = 4.0247e-04
Loss = 8.6005e-02, PNorm = 63.7282, GNorm = 0.5259, lr_0 = 4.0219e-04
Loss = 7.0733e-02, PNorm = 63.7418, GNorm = 0.5335, lr_0 = 4.0192e-04
Loss = 6.6575e-02, PNorm = 63.7514, GNorm = 0.6982, lr_0 = 4.0164e-04
Loss = 8.1314e-02, PNorm = 63.7585, GNorm = 0.7233, lr_0 = 4.0137e-04
Loss = 7.5601e-02, PNorm = 63.7658, GNorm = 0.6234, lr_0 = 4.0109e-04
Loss = 8.6348e-02, PNorm = 63.7745, GNorm = 0.6901, lr_0 = 4.0082e-04
Loss = 7.0349e-02, PNorm = 63.7860, GNorm = 0.8724, lr_0 = 4.0054e-04
Loss = 7.7050e-02, PNorm = 63.7948, GNorm = 0.6589, lr_0 = 4.0027e-04
Loss = 7.9935e-02, PNorm = 63.8051, GNorm = 0.4622, lr_0 = 3.9999e-04
Loss = 7.5520e-02, PNorm = 63.8160, GNorm = 0.5727, lr_0 = 3.9972e-04
Loss = 8.1566e-02, PNorm = 63.8242, GNorm = 0.4941, lr_0 = 3.9945e-04
Loss = 7.3284e-02, PNorm = 63.8315, GNorm = 0.5532, lr_0 = 3.9917e-04
Loss = 6.8742e-02, PNorm = 63.8428, GNorm = 0.4787, lr_0 = 3.9890e-04
Loss = 7.2360e-02, PNorm = 63.8539, GNorm = 0.6112, lr_0 = 3.9863e-04
Loss = 8.5606e-02, PNorm = 63.8631, GNorm = 0.5453, lr_0 = 3.9835e-04
Loss = 6.3893e-02, PNorm = 63.8713, GNorm = 0.5080, lr_0 = 3.9808e-04
Loss = 7.2036e-02, PNorm = 63.8794, GNorm = 0.7755, lr_0 = 3.9781e-04
Loss = 6.4777e-02, PNorm = 63.8867, GNorm = 0.7024, lr_0 = 3.9753e-04
Loss = 7.8746e-02, PNorm = 63.8965, GNorm = 0.5325, lr_0 = 3.9726e-04
Loss = 8.5822e-02, PNorm = 63.9112, GNorm = 0.7998, lr_0 = 3.9699e-04
Loss = 7.2936e-02, PNorm = 63.9210, GNorm = 0.4904, lr_0 = 3.9672e-04
Loss = 7.2257e-02, PNorm = 63.9295, GNorm = 0.6171, lr_0 = 3.9645e-04
Loss = 7.4003e-02, PNorm = 63.9398, GNorm = 0.5156, lr_0 = 3.9617e-04
Loss = 8.1457e-02, PNorm = 63.9501, GNorm = 0.7419, lr_0 = 3.9590e-04
Loss = 8.0852e-02, PNorm = 63.9587, GNorm = 0.8156, lr_0 = 3.9563e-04
Loss = 7.2228e-02, PNorm = 63.9672, GNorm = 0.6350, lr_0 = 3.9536e-04
Loss = 7.2161e-02, PNorm = 63.9741, GNorm = 0.9117, lr_0 = 3.9509e-04
Loss = 7.6305e-02, PNorm = 63.9796, GNorm = 0.4265, lr_0 = 3.9482e-04
Loss = 7.4310e-02, PNorm = 63.9866, GNorm = 0.6136, lr_0 = 3.9455e-04
Loss = 8.4189e-02, PNorm = 63.9937, GNorm = 0.5229, lr_0 = 3.9428e-04
Loss = 6.7917e-02, PNorm = 64.0035, GNorm = 0.5305, lr_0 = 3.9401e-04
Loss = 7.5617e-02, PNorm = 64.0140, GNorm = 0.4666, lr_0 = 3.9374e-04
Loss = 6.3029e-02, PNorm = 64.0245, GNorm = 0.4662, lr_0 = 3.9347e-04
Loss = 6.9201e-02, PNorm = 64.0334, GNorm = 0.4926, lr_0 = 3.9320e-04
Loss = 6.9941e-02, PNorm = 64.0413, GNorm = 0.4816, lr_0 = 3.9293e-04
Loss = 6.8307e-02, PNorm = 64.0490, GNorm = 0.4598, lr_0 = 3.9266e-04
Loss = 7.5886e-02, PNorm = 64.0581, GNorm = 0.5295, lr_0 = 3.9239e-04
Loss = 8.4682e-02, PNorm = 64.0664, GNorm = 0.6793, lr_0 = 3.9212e-04
Loss = 6.8767e-02, PNorm = 64.0720, GNorm = 0.5427, lr_0 = 3.9185e-04
Loss = 7.4149e-02, PNorm = 64.0792, GNorm = 0.6623, lr_0 = 3.9159e-04
Loss = 7.0038e-02, PNorm = 64.0887, GNorm = 0.5205, lr_0 = 3.9132e-04
Loss = 6.9815e-02, PNorm = 64.0976, GNorm = 0.6672, lr_0 = 3.9105e-04
Loss = 7.7433e-02, PNorm = 64.1079, GNorm = 0.7130, lr_0 = 3.9078e-04
Loss = 7.7340e-02, PNorm = 64.1201, GNorm = 0.6226, lr_0 = 3.9051e-04
Loss = 7.9299e-02, PNorm = 64.1339, GNorm = 0.9405, lr_0 = 3.9025e-04
Loss = 7.6524e-02, PNorm = 64.1430, GNorm = 0.8261, lr_0 = 3.8998e-04
Loss = 7.2920e-02, PNorm = 64.1521, GNorm = 0.8523, lr_0 = 3.8971e-04
Loss = 7.4189e-02, PNorm = 64.1596, GNorm = 0.7327, lr_0 = 3.8945e-04
Loss = 7.6630e-02, PNorm = 64.1639, GNorm = 0.7507, lr_0 = 3.8918e-04
Loss = 7.5370e-02, PNorm = 64.1714, GNorm = 0.7174, lr_0 = 3.8891e-04
Loss = 6.9637e-02, PNorm = 64.1779, GNorm = 0.7623, lr_0 = 3.8865e-04
Loss = 8.9943e-02, PNorm = 64.1836, GNorm = 0.4236, lr_0 = 3.8838e-04
Loss = 7.7105e-02, PNorm = 64.1934, GNorm = 0.4547, lr_0 = 3.8811e-04
Loss = 6.6920e-02, PNorm = 64.2032, GNorm = 0.4304, lr_0 = 3.8785e-04
Loss = 7.5992e-02, PNorm = 64.2108, GNorm = 0.5558, lr_0 = 3.8758e-04
Loss = 8.5630e-02, PNorm = 64.2172, GNorm = 0.4967, lr_0 = 3.8732e-04
Loss = 8.2296e-02, PNorm = 64.2267, GNorm = 0.5216, lr_0 = 3.8705e-04
Loss = 8.4167e-02, PNorm = 64.2384, GNorm = 0.6601, lr_0 = 3.8679e-04
Loss = 8.1031e-02, PNorm = 64.2477, GNorm = 0.9425, lr_0 = 3.8652e-04
Loss = 7.2049e-02, PNorm = 64.2582, GNorm = 0.4576, lr_0 = 3.8626e-04
Loss = 7.6744e-02, PNorm = 64.2658, GNorm = 0.7012, lr_0 = 3.8599e-04
Loss = 7.3240e-02, PNorm = 64.2737, GNorm = 0.6200, lr_0 = 3.8573e-04
Loss = 7.5980e-02, PNorm = 64.2814, GNorm = 0.6243, lr_0 = 3.8546e-04
Loss = 7.0540e-02, PNorm = 64.2903, GNorm = 0.4818, lr_0 = 3.8520e-04
Loss = 7.8879e-02, PNorm = 64.2982, GNorm = 0.4896, lr_0 = 3.8493e-04
Loss = 8.3544e-02, PNorm = 64.3049, GNorm = 0.6586, lr_0 = 3.8467e-04
Loss = 7.4781e-02, PNorm = 64.3152, GNorm = 0.8026, lr_0 = 3.8441e-04
Loss = 7.8288e-02, PNorm = 64.3268, GNorm = 0.5002, lr_0 = 3.8414e-04
Loss = 8.7070e-02, PNorm = 64.3353, GNorm = 0.6382, lr_0 = 3.8388e-04
Loss = 7.2181e-02, PNorm = 64.3466, GNorm = 0.6206, lr_0 = 3.8362e-04
Loss = 6.2963e-02, PNorm = 64.3566, GNorm = 0.5371, lr_0 = 3.8336e-04
Loss = 6.7487e-02, PNorm = 64.3648, GNorm = 0.4001, lr_0 = 3.8309e-04
Loss = 7.9405e-02, PNorm = 64.3727, GNorm = 0.4451, lr_0 = 3.8283e-04
Loss = 7.4631e-02, PNorm = 64.3792, GNorm = 0.7122, lr_0 = 3.8257e-04
Loss = 7.0895e-02, PNorm = 64.3872, GNorm = 0.4371, lr_0 = 3.8231e-04
Loss = 7.5480e-02, PNorm = 64.3950, GNorm = 1.0822, lr_0 = 3.8204e-04
Loss = 8.7272e-02, PNorm = 64.4010, GNorm = 0.6393, lr_0 = 3.8178e-04
Loss = 7.7407e-02, PNorm = 64.4102, GNorm = 0.5531, lr_0 = 3.8152e-04
Loss = 7.5406e-02, PNorm = 64.4181, GNorm = 0.4354, lr_0 = 3.8126e-04
Loss = 7.5205e-02, PNorm = 64.4265, GNorm = 0.5822, lr_0 = 3.8100e-04
Loss = 7.8052e-02, PNorm = 64.4352, GNorm = 0.6786, lr_0 = 3.8074e-04
Loss = 7.5546e-02, PNorm = 64.4436, GNorm = 0.6072, lr_0 = 3.8048e-04
Loss = 6.9004e-02, PNorm = 64.4512, GNorm = 0.4842, lr_0 = 3.8022e-04
Loss = 7.5825e-02, PNorm = 64.4562, GNorm = 0.5511, lr_0 = 3.7995e-04
Loss = 7.8144e-02, PNorm = 64.4608, GNorm = 0.5494, lr_0 = 3.7969e-04
Loss = 7.8720e-02, PNorm = 64.4679, GNorm = 0.4943, lr_0 = 3.7943e-04
Loss = 9.0970e-02, PNorm = 64.4748, GNorm = 0.5123, lr_0 = 3.7917e-04
Loss = 7.8538e-02, PNorm = 64.4802, GNorm = 0.7079, lr_0 = 3.7891e-04
Loss = 8.5476e-02, PNorm = 64.4863, GNorm = 1.1230, lr_0 = 3.7866e-04
Loss = 7.4596e-02, PNorm = 64.4919, GNorm = 0.8081, lr_0 = 3.7840e-04
Loss = 8.3852e-02, PNorm = 64.5010, GNorm = 1.7380, lr_0 = 3.7814e-04
Loss = 9.0761e-02, PNorm = 64.5088, GNorm = 0.9136, lr_0 = 3.7788e-04
Loss = 7.1524e-02, PNorm = 64.5167, GNorm = 1.1089, lr_0 = 3.7762e-04
Loss = 7.4006e-02, PNorm = 64.5230, GNorm = 0.5114, lr_0 = 3.7736e-04
Loss = 7.5812e-02, PNorm = 64.5257, GNorm = 0.5627, lr_0 = 3.7710e-04
Loss = 8.4136e-02, PNorm = 64.5345, GNorm = 0.4912, lr_0 = 3.7684e-04
Loss = 8.1334e-02, PNorm = 64.5471, GNorm = 0.6573, lr_0 = 3.7659e-04
Loss = 8.8888e-02, PNorm = 64.5579, GNorm = 0.7836, lr_0 = 3.7633e-04
Loss = 7.8802e-02, PNorm = 64.5631, GNorm = 0.7819, lr_0 = 3.7607e-04
Loss = 8.0109e-02, PNorm = 64.5699, GNorm = 0.4888, lr_0 = 3.7581e-04
Loss = 6.6966e-02, PNorm = 64.5749, GNorm = 0.6148, lr_0 = 3.7555e-04
Loss = 8.5466e-02, PNorm = 64.5818, GNorm = 0.8289, lr_0 = 3.7530e-04
Loss = 7.4775e-02, PNorm = 64.5887, GNorm = 0.5746, lr_0 = 3.7504e-04
Loss = 8.4014e-02, PNorm = 64.5936, GNorm = 0.5673, lr_0 = 3.7478e-04
Loss = 7.5818e-02, PNorm = 64.5995, GNorm = 0.6466, lr_0 = 3.7453e-04
Loss = 7.6270e-02, PNorm = 64.6099, GNorm = 0.8746, lr_0 = 3.7427e-04
Loss = 8.0563e-02, PNorm = 64.6192, GNorm = 0.8173, lr_0 = 3.7401e-04
Loss = 7.2061e-02, PNorm = 64.6284, GNorm = 0.4073, lr_0 = 3.7376e-04
Loss = 7.6745e-02, PNorm = 64.6361, GNorm = 0.8298, lr_0 = 3.7350e-04
Loss = 7.6048e-02, PNorm = 64.6445, GNorm = 0.9855, lr_0 = 3.7325e-04
Loss = 7.7409e-02, PNorm = 64.6511, GNorm = 0.5574, lr_0 = 3.7299e-04
Loss = 7.4071e-02, PNorm = 64.6576, GNorm = 0.5779, lr_0 = 3.7273e-04
Validation mae = 0.393046
Epoch 14
Loss = 5.9825e-02, PNorm = 64.6629, GNorm = 0.4920, lr_0 = 3.7248e-04
Loss = 6.5869e-02, PNorm = 64.6712, GNorm = 0.8753, lr_0 = 3.7222e-04
Loss = 6.2330e-02, PNorm = 64.6805, GNorm = 0.8116, lr_0 = 3.7197e-04
Loss = 6.2318e-02, PNorm = 64.6900, GNorm = 1.3921, lr_0 = 3.7171e-04
Loss = 7.3121e-02, PNorm = 64.7004, GNorm = 0.7935, lr_0 = 3.7146e-04
Loss = 7.6862e-02, PNorm = 64.7102, GNorm = 0.5800, lr_0 = 3.7120e-04
Loss = 6.5169e-02, PNorm = 64.7180, GNorm = 0.6427, lr_0 = 3.7095e-04
Loss = 6.7236e-02, PNorm = 64.7277, GNorm = 0.6897, lr_0 = 3.7070e-04
Loss = 6.3121e-02, PNorm = 64.7363, GNorm = 1.0376, lr_0 = 3.7044e-04
Loss = 6.5688e-02, PNorm = 64.7442, GNorm = 0.5779, lr_0 = 3.7019e-04
Loss = 7.9250e-02, PNorm = 64.7511, GNorm = 0.8122, lr_0 = 3.6993e-04
Loss = 6.8847e-02, PNorm = 64.7616, GNorm = 0.4879, lr_0 = 3.6968e-04
Loss = 6.0072e-02, PNorm = 64.7713, GNorm = 0.4568, lr_0 = 3.6943e-04
Loss = 6.6320e-02, PNorm = 64.7770, GNorm = 0.4691, lr_0 = 3.6917e-04
Loss = 7.2183e-02, PNorm = 64.7833, GNorm = 0.6430, lr_0 = 3.6892e-04
Loss = 5.9932e-02, PNorm = 64.7896, GNorm = 0.5817, lr_0 = 3.6867e-04
Loss = 6.6368e-02, PNorm = 64.7983, GNorm = 0.4364, lr_0 = 3.6842e-04
Loss = 7.2162e-02, PNorm = 64.8086, GNorm = 0.6702, lr_0 = 3.6816e-04
Loss = 7.1286e-02, PNorm = 64.8175, GNorm = 0.7384, lr_0 = 3.6791e-04
Loss = 5.8845e-02, PNorm = 64.8240, GNorm = 0.8149, lr_0 = 3.6766e-04
Loss = 6.3440e-02, PNorm = 64.8277, GNorm = 0.7679, lr_0 = 3.6741e-04
Loss = 7.4098e-02, PNorm = 64.8374, GNorm = 0.5171, lr_0 = 3.6716e-04
Loss = 6.7723e-02, PNorm = 64.8484, GNorm = 0.5415, lr_0 = 3.6690e-04
Loss = 6.9237e-02, PNorm = 64.8541, GNorm = 0.5919, lr_0 = 3.6665e-04
Loss = 7.6463e-02, PNorm = 64.8632, GNorm = 0.6313, lr_0 = 3.6640e-04
Loss = 6.3852e-02, PNorm = 64.8744, GNorm = 0.8588, lr_0 = 3.6615e-04
Loss = 6.9181e-02, PNorm = 64.8830, GNorm = 0.6204, lr_0 = 3.6590e-04
Loss = 7.1689e-02, PNorm = 64.8930, GNorm = 0.6465, lr_0 = 3.6565e-04
Loss = 6.3389e-02, PNorm = 64.9034, GNorm = 0.7842, lr_0 = 3.6540e-04
Loss = 5.9476e-02, PNorm = 64.9108, GNorm = 0.7602, lr_0 = 3.6515e-04
Loss = 7.4885e-02, PNorm = 64.9182, GNorm = 0.7068, lr_0 = 3.6490e-04
Loss = 7.5529e-02, PNorm = 64.9244, GNorm = 1.0993, lr_0 = 3.6465e-04
Loss = 5.2985e-02, PNorm = 64.9312, GNorm = 0.5165, lr_0 = 3.6440e-04
Loss = 6.5954e-02, PNorm = 64.9376, GNorm = 0.8969, lr_0 = 3.6415e-04
Loss = 6.8001e-02, PNorm = 64.9456, GNorm = 0.5033, lr_0 = 3.6390e-04
Loss = 7.1640e-02, PNorm = 64.9535, GNorm = 0.3735, lr_0 = 3.6365e-04
Loss = 7.1490e-02, PNorm = 64.9616, GNorm = 0.5551, lr_0 = 3.6340e-04
Loss = 7.1999e-02, PNorm = 64.9680, GNorm = 0.8225, lr_0 = 3.6315e-04
Loss = 6.3998e-02, PNorm = 64.9755, GNorm = 0.6229, lr_0 = 3.6290e-04
Loss = 7.4142e-02, PNorm = 64.9857, GNorm = 0.4926, lr_0 = 3.6266e-04
Loss = 7.0661e-02, PNorm = 64.9934, GNorm = 0.5099, lr_0 = 3.6241e-04
Loss = 7.6881e-02, PNorm = 65.0026, GNorm = 0.5337, lr_0 = 3.6216e-04
Loss = 6.5281e-02, PNorm = 65.0098, GNorm = 0.5589, lr_0 = 3.6191e-04
Loss = 7.3453e-02, PNorm = 65.0181, GNorm = 0.5995, lr_0 = 3.6166e-04
Loss = 6.1882e-02, PNorm = 65.0238, GNorm = 0.9973, lr_0 = 3.6141e-04
Loss = 6.8459e-02, PNorm = 65.0328, GNorm = 0.4780, lr_0 = 3.6117e-04
Loss = 7.6481e-02, PNorm = 65.0435, GNorm = 0.6010, lr_0 = 3.6092e-04
Loss = 7.3259e-02, PNorm = 65.0539, GNorm = 0.7439, lr_0 = 3.6067e-04
Loss = 7.2067e-02, PNorm = 65.0625, GNorm = 0.6812, lr_0 = 3.6043e-04
Loss = 6.5982e-02, PNorm = 65.0693, GNorm = 0.4595, lr_0 = 3.6018e-04
Loss = 6.2520e-02, PNorm = 65.0777, GNorm = 0.8175, lr_0 = 3.5993e-04
Loss = 7.0667e-02, PNorm = 65.0874, GNorm = 0.6138, lr_0 = 3.5969e-04
Loss = 7.9479e-02, PNorm = 65.0966, GNorm = 0.9534, lr_0 = 3.5944e-04
Loss = 8.5890e-02, PNorm = 65.1063, GNorm = 0.4739, lr_0 = 3.5919e-04
Loss = 7.8494e-02, PNorm = 65.1145, GNorm = 0.5660, lr_0 = 3.5895e-04
Loss = 6.5982e-02, PNorm = 65.1216, GNorm = 0.8353, lr_0 = 3.5870e-04
Loss = 6.8927e-02, PNorm = 65.1264, GNorm = 0.5285, lr_0 = 3.5845e-04
Loss = 6.1226e-02, PNorm = 65.1322, GNorm = 0.6311, lr_0 = 3.5821e-04
Loss = 6.5932e-02, PNorm = 65.1367, GNorm = 0.5745, lr_0 = 3.5796e-04
Loss = 6.9798e-02, PNorm = 65.1408, GNorm = 0.7052, lr_0 = 3.5772e-04
Loss = 8.0118e-02, PNorm = 65.1473, GNorm = 0.5904, lr_0 = 3.5747e-04
Loss = 6.6862e-02, PNorm = 65.1562, GNorm = 0.6442, lr_0 = 3.5723e-04
Loss = 6.6591e-02, PNorm = 65.1630, GNorm = 0.9995, lr_0 = 3.5698e-04
Loss = 6.5902e-02, PNorm = 65.1686, GNorm = 0.5028, lr_0 = 3.5674e-04
Loss = 8.2112e-02, PNorm = 65.1787, GNorm = 1.2708, lr_0 = 3.5650e-04
Loss = 6.3258e-02, PNorm = 65.1874, GNorm = 0.4678, lr_0 = 3.5625e-04
Loss = 6.8442e-02, PNorm = 65.1943, GNorm = 0.4825, lr_0 = 3.5601e-04
Loss = 6.3245e-02, PNorm = 65.2002, GNorm = 0.4164, lr_0 = 3.5576e-04
Loss = 7.8877e-02, PNorm = 65.2098, GNorm = 0.9342, lr_0 = 3.5552e-04
Loss = 7.2376e-02, PNorm = 65.2215, GNorm = 0.4884, lr_0 = 3.5528e-04
Loss = 6.1024e-02, PNorm = 65.2302, GNorm = 0.6668, lr_0 = 3.5503e-04
Loss = 7.0430e-02, PNorm = 65.2370, GNorm = 0.5719, lr_0 = 3.5479e-04
Loss = 9.2650e-02, PNorm = 65.2455, GNorm = 0.7103, lr_0 = 3.5455e-04
Loss = 6.9605e-02, PNorm = 65.2540, GNorm = 0.9039, lr_0 = 3.5430e-04
Loss = 6.4225e-02, PNorm = 65.2607, GNorm = 0.4192, lr_0 = 3.5406e-04
Loss = 7.7924e-02, PNorm = 65.2693, GNorm = 0.7003, lr_0 = 3.5382e-04
Loss = 7.0149e-02, PNorm = 65.2760, GNorm = 0.7257, lr_0 = 3.5358e-04
Loss = 7.8684e-02, PNorm = 65.2817, GNorm = 0.6781, lr_0 = 3.5333e-04
Loss = 6.9479e-02, PNorm = 65.2878, GNorm = 1.0347, lr_0 = 3.5309e-04
Loss = 7.1169e-02, PNorm = 65.2949, GNorm = 0.9638, lr_0 = 3.5285e-04
Loss = 6.4544e-02, PNorm = 65.3017, GNorm = 0.4257, lr_0 = 3.5261e-04
Loss = 7.0826e-02, PNorm = 65.3048, GNorm = 0.5382, lr_0 = 3.5237e-04
Loss = 6.3858e-02, PNorm = 65.3097, GNorm = 0.5094, lr_0 = 3.5212e-04
Loss = 6.5813e-02, PNorm = 65.3166, GNorm = 0.6273, lr_0 = 3.5188e-04
Loss = 6.8340e-02, PNorm = 65.3227, GNorm = 0.5308, lr_0 = 3.5164e-04
Loss = 6.8032e-02, PNorm = 65.3293, GNorm = 0.5856, lr_0 = 3.5140e-04
Loss = 8.0404e-02, PNorm = 65.3360, GNorm = 0.4757, lr_0 = 3.5116e-04
Loss = 7.8985e-02, PNorm = 65.3448, GNorm = 0.4778, lr_0 = 3.5092e-04
Loss = 7.1477e-02, PNorm = 65.3517, GNorm = 1.0570, lr_0 = 3.5068e-04
Loss = 6.7500e-02, PNorm = 65.3570, GNorm = 0.5792, lr_0 = 3.5044e-04
Loss = 6.7236e-02, PNorm = 65.3640, GNorm = 0.5280, lr_0 = 3.5020e-04
Loss = 8.9416e-02, PNorm = 65.3719, GNorm = 0.7125, lr_0 = 3.4996e-04
Loss = 6.4419e-02, PNorm = 65.3780, GNorm = 0.8678, lr_0 = 3.4972e-04
Loss = 7.8481e-02, PNorm = 65.3867, GNorm = 0.6855, lr_0 = 3.4948e-04
Loss = 6.9619e-02, PNorm = 65.3939, GNorm = 0.8659, lr_0 = 3.4924e-04
Loss = 6.7666e-02, PNorm = 65.3976, GNorm = 0.5414, lr_0 = 3.4900e-04
Loss = 7.5319e-02, PNorm = 65.4035, GNorm = 0.6843, lr_0 = 3.4876e-04
Loss = 8.1228e-02, PNorm = 65.4111, GNorm = 0.7692, lr_0 = 3.4852e-04
Loss = 7.7997e-02, PNorm = 65.4165, GNorm = 0.5772, lr_0 = 3.4828e-04
Loss = 8.0559e-02, PNorm = 65.4228, GNorm = 0.5718, lr_0 = 3.4805e-04
Loss = 8.0605e-02, PNorm = 65.4310, GNorm = 0.8125, lr_0 = 3.4781e-04
Loss = 6.6674e-02, PNorm = 65.4390, GNorm = 0.9994, lr_0 = 3.4757e-04
Loss = 8.2510e-02, PNorm = 65.4471, GNorm = 0.6646, lr_0 = 3.4733e-04
Loss = 7.3520e-02, PNorm = 65.4560, GNorm = 0.5678, lr_0 = 3.4709e-04
Loss = 6.7510e-02, PNorm = 65.4645, GNorm = 0.7677, lr_0 = 3.4686e-04
Loss = 7.6737e-02, PNorm = 65.4717, GNorm = 0.4950, lr_0 = 3.4662e-04
Loss = 8.0414e-02, PNorm = 65.4770, GNorm = 0.5948, lr_0 = 3.4638e-04
Loss = 8.1232e-02, PNorm = 65.4820, GNorm = 0.4879, lr_0 = 3.4614e-04
Loss = 7.3636e-02, PNorm = 65.4892, GNorm = 0.7610, lr_0 = 3.4591e-04
Loss = 7.3830e-02, PNorm = 65.4943, GNorm = 0.9141, lr_0 = 3.4567e-04
Loss = 7.1017e-02, PNorm = 65.4983, GNorm = 0.5441, lr_0 = 3.4543e-04
Loss = 7.2816e-02, PNorm = 65.5031, GNorm = 0.8095, lr_0 = 3.4520e-04
Loss = 7.8011e-02, PNorm = 65.5113, GNorm = 0.3997, lr_0 = 3.4496e-04
Loss = 6.0197e-02, PNorm = 65.5178, GNorm = 0.9006, lr_0 = 3.4472e-04
Loss = 6.9864e-02, PNorm = 65.5254, GNorm = 0.6011, lr_0 = 3.4449e-04
Loss = 7.3473e-02, PNorm = 65.5329, GNorm = 0.6230, lr_0 = 3.4425e-04
Loss = 7.4592e-02, PNorm = 65.5398, GNorm = 0.5654, lr_0 = 3.4402e-04
Loss = 7.9901e-02, PNorm = 65.5454, GNorm = 0.6106, lr_0 = 3.4378e-04
Loss = 7.4639e-02, PNorm = 65.5511, GNorm = 0.6651, lr_0 = 3.4354e-04
Loss = 6.9811e-02, PNorm = 65.5594, GNorm = 0.8256, lr_0 = 3.4331e-04
Validation mae = 0.406940
Epoch 15
Loss = 7.5914e-02, PNorm = 65.5714, GNorm = 0.6378, lr_0 = 3.4307e-04
Loss = 6.9167e-02, PNorm = 65.5855, GNorm = 0.4248, lr_0 = 3.4284e-04
Loss = 6.5898e-02, PNorm = 65.5934, GNorm = 0.7018, lr_0 = 3.4260e-04
Loss = 5.5934e-02, PNorm = 65.5977, GNorm = 0.5960, lr_0 = 3.4237e-04
Loss = 6.0072e-02, PNorm = 65.6049, GNorm = 0.8423, lr_0 = 3.4213e-04
Loss = 6.3811e-02, PNorm = 65.6129, GNorm = 0.8824, lr_0 = 3.4190e-04
Loss = 6.0727e-02, PNorm = 65.6227, GNorm = 0.9080, lr_0 = 3.4167e-04
Loss = 5.7092e-02, PNorm = 65.6345, GNorm = 0.3947, lr_0 = 3.4143e-04
Loss = 6.2947e-02, PNorm = 65.6414, GNorm = 0.6943, lr_0 = 3.4120e-04
Loss = 7.0778e-02, PNorm = 65.6473, GNorm = 1.0092, lr_0 = 3.4096e-04
Loss = 6.4038e-02, PNorm = 65.6536, GNorm = 0.7515, lr_0 = 3.4073e-04
Loss = 8.0527e-02, PNorm = 65.6607, GNorm = 0.8732, lr_0 = 3.4050e-04
Loss = 6.9491e-02, PNorm = 65.6680, GNorm = 0.5748, lr_0 = 3.4026e-04
Loss = 6.3449e-02, PNorm = 65.6753, GNorm = 1.1212, lr_0 = 3.4003e-04
Loss = 6.6187e-02, PNorm = 65.6823, GNorm = 0.6238, lr_0 = 3.3980e-04
Loss = 6.3492e-02, PNorm = 65.6909, GNorm = 0.5044, lr_0 = 3.3956e-04
Loss = 7.0427e-02, PNorm = 65.7004, GNorm = 0.6006, lr_0 = 3.3933e-04
Loss = 6.5935e-02, PNorm = 65.7083, GNorm = 1.0298, lr_0 = 3.3910e-04
Loss = 5.9066e-02, PNorm = 65.7152, GNorm = 0.5715, lr_0 = 3.3887e-04
Loss = 6.2179e-02, PNorm = 65.7219, GNorm = 0.5346, lr_0 = 3.3864e-04
Loss = 6.3757e-02, PNorm = 65.7286, GNorm = 0.5903, lr_0 = 3.3840e-04
Loss = 6.3272e-02, PNorm = 65.7364, GNorm = 0.5917, lr_0 = 3.3817e-04
Loss = 6.7453e-02, PNorm = 65.7419, GNorm = 0.3983, lr_0 = 3.3794e-04
Loss = 6.5212e-02, PNorm = 65.7487, GNorm = 0.5003, lr_0 = 3.3771e-04
Loss = 6.2172e-02, PNorm = 65.7577, GNorm = 0.6117, lr_0 = 3.3748e-04
Loss = 6.5775e-02, PNorm = 65.7669, GNorm = 0.4782, lr_0 = 3.3725e-04
Loss = 7.0419e-02, PNorm = 65.7747, GNorm = 0.6256, lr_0 = 3.3701e-04
Loss = 5.7724e-02, PNorm = 65.7821, GNorm = 0.5428, lr_0 = 3.3678e-04
Loss = 6.3251e-02, PNorm = 65.7903, GNorm = 0.4544, lr_0 = 3.3655e-04
Loss = 5.3918e-02, PNorm = 65.7989, GNorm = 0.8772, lr_0 = 3.3632e-04
Loss = 6.7839e-02, PNorm = 65.8067, GNorm = 0.5745, lr_0 = 3.3609e-04
Loss = 5.7027e-02, PNorm = 65.8113, GNorm = 0.8944, lr_0 = 3.3586e-04
Loss = 5.7502e-02, PNorm = 65.8174, GNorm = 0.6579, lr_0 = 3.3563e-04
Loss = 6.6174e-02, PNorm = 65.8242, GNorm = 0.6600, lr_0 = 3.3540e-04
Loss = 6.7281e-02, PNorm = 65.8314, GNorm = 0.8335, lr_0 = 3.3517e-04
Loss = 5.6585e-02, PNorm = 65.8379, GNorm = 0.5158, lr_0 = 3.3494e-04
Loss = 6.6261e-02, PNorm = 65.8425, GNorm = 0.6136, lr_0 = 3.3471e-04
Loss = 6.1943e-02, PNorm = 65.8466, GNorm = 0.5456, lr_0 = 3.3448e-04
Loss = 5.7069e-02, PNorm = 65.8529, GNorm = 0.5732, lr_0 = 3.3425e-04
Loss = 6.4249e-02, PNorm = 65.8593, GNorm = 0.5896, lr_0 = 3.3403e-04
Loss = 6.7595e-02, PNorm = 65.8642, GNorm = 0.9379, lr_0 = 3.3380e-04
Loss = 7.1510e-02, PNorm = 65.8702, GNorm = 0.5754, lr_0 = 3.3357e-04
Loss = 6.6939e-02, PNorm = 65.8779, GNorm = 0.5101, lr_0 = 3.3334e-04
Loss = 6.4776e-02, PNorm = 65.8868, GNorm = 0.5882, lr_0 = 3.3311e-04
Loss = 7.1582e-02, PNorm = 65.8932, GNorm = 0.4645, lr_0 = 3.3288e-04
Loss = 6.4735e-02, PNorm = 65.8993, GNorm = 1.0348, lr_0 = 3.3265e-04
Loss = 7.2522e-02, PNorm = 65.9088, GNorm = 0.9190, lr_0 = 3.3243e-04
Loss = 6.7657e-02, PNorm = 65.9172, GNorm = 0.5480, lr_0 = 3.3220e-04
Loss = 6.1588e-02, PNorm = 65.9237, GNorm = 0.6159, lr_0 = 3.3197e-04
Loss = 6.4161e-02, PNorm = 65.9286, GNorm = 0.5909, lr_0 = 3.3174e-04
Loss = 5.7936e-02, PNorm = 65.9351, GNorm = 0.4453, lr_0 = 3.3152e-04
Loss = 6.9338e-02, PNorm = 65.9443, GNorm = 1.1148, lr_0 = 3.3129e-04
Loss = 6.3618e-02, PNorm = 65.9515, GNorm = 0.5007, lr_0 = 3.3106e-04
Loss = 6.4548e-02, PNorm = 65.9567, GNorm = 0.4422, lr_0 = 3.3084e-04
Loss = 6.9598e-02, PNorm = 65.9625, GNorm = 1.0267, lr_0 = 3.3061e-04
Loss = 6.5787e-02, PNorm = 65.9688, GNorm = 0.6074, lr_0 = 3.3038e-04
Loss = 6.3734e-02, PNorm = 65.9754, GNorm = 0.9864, lr_0 = 3.3016e-04
Loss = 6.5141e-02, PNorm = 65.9823, GNorm = 0.5710, lr_0 = 3.2993e-04
Loss = 6.4659e-02, PNorm = 65.9913, GNorm = 0.6812, lr_0 = 3.2970e-04
Loss = 5.5294e-02, PNorm = 65.9985, GNorm = 0.6686, lr_0 = 3.2948e-04
Loss = 6.5684e-02, PNorm = 66.0051, GNorm = 1.1648, lr_0 = 3.2925e-04
Loss = 5.3268e-02, PNorm = 66.0131, GNorm = 0.4020, lr_0 = 3.2903e-04
Loss = 6.7224e-02, PNorm = 66.0205, GNorm = 0.4116, lr_0 = 3.2880e-04
Loss = 6.2604e-02, PNorm = 66.0260, GNorm = 0.5196, lr_0 = 3.2858e-04
Loss = 7.7514e-02, PNorm = 66.0322, GNorm = 0.5256, lr_0 = 3.2835e-04
Loss = 7.0422e-02, PNorm = 66.0381, GNorm = 0.6605, lr_0 = 3.2813e-04
Loss = 7.0863e-02, PNorm = 66.0474, GNorm = 0.7756, lr_0 = 3.2790e-04
Loss = 6.5334e-02, PNorm = 66.0553, GNorm = 0.5410, lr_0 = 3.2768e-04
Loss = 6.8211e-02, PNorm = 66.0622, GNorm = 0.3859, lr_0 = 3.2745e-04
Loss = 6.3799e-02, PNorm = 66.0684, GNorm = 0.5241, lr_0 = 3.2723e-04
Loss = 6.2655e-02, PNorm = 66.0743, GNorm = 0.7346, lr_0 = 3.2700e-04
Loss = 6.3316e-02, PNorm = 66.0784, GNorm = 0.5317, lr_0 = 3.2678e-04
Loss = 6.0049e-02, PNorm = 66.0821, GNorm = 0.7184, lr_0 = 3.2656e-04
Loss = 6.2741e-02, PNorm = 66.0856, GNorm = 0.6439, lr_0 = 3.2633e-04
Loss = 6.3762e-02, PNorm = 66.0911, GNorm = 0.5593, lr_0 = 3.2611e-04
Loss = 7.3655e-02, PNorm = 66.0961, GNorm = 0.4369, lr_0 = 3.2589e-04
Loss = 6.8322e-02, PNorm = 66.1008, GNorm = 0.4980, lr_0 = 3.2566e-04
Loss = 6.5738e-02, PNorm = 66.1061, GNorm = 0.6293, lr_0 = 3.2544e-04
Loss = 6.3249e-02, PNorm = 66.1130, GNorm = 0.8321, lr_0 = 3.2522e-04
Loss = 6.8664e-02, PNorm = 66.1161, GNorm = 0.7732, lr_0 = 3.2499e-04
Loss = 5.8339e-02, PNorm = 66.1208, GNorm = 0.7682, lr_0 = 3.2477e-04
Loss = 6.6729e-02, PNorm = 66.1259, GNorm = 0.9220, lr_0 = 3.2455e-04
Loss = 6.5571e-02, PNorm = 66.1310, GNorm = 0.8785, lr_0 = 3.2433e-04
Loss = 7.2230e-02, PNorm = 66.1383, GNorm = 0.5948, lr_0 = 3.2410e-04
Loss = 6.2890e-02, PNorm = 66.1436, GNorm = 0.5730, lr_0 = 3.2388e-04
Loss = 5.3014e-02, PNorm = 66.1498, GNorm = 0.4629, lr_0 = 3.2366e-04
Loss = 6.1196e-02, PNorm = 66.1560, GNorm = 0.4866, lr_0 = 3.2344e-04
Loss = 6.1251e-02, PNorm = 66.1626, GNorm = 0.8472, lr_0 = 3.2322e-04
Loss = 6.1139e-02, PNorm = 66.1675, GNorm = 0.4973, lr_0 = 3.2300e-04
Loss = 6.3111e-02, PNorm = 66.1755, GNorm = 0.5305, lr_0 = 3.2277e-04
Loss = 6.5763e-02, PNorm = 66.1830, GNorm = 0.8307, lr_0 = 3.2255e-04
Loss = 8.1093e-02, PNorm = 66.1943, GNorm = 1.2473, lr_0 = 3.2233e-04
Loss = 7.1607e-02, PNorm = 66.2037, GNorm = 0.7167, lr_0 = 3.2211e-04
Loss = 6.1960e-02, PNorm = 66.2093, GNorm = 0.8769, lr_0 = 3.2189e-04
Loss = 7.1996e-02, PNorm = 66.2142, GNorm = 0.6430, lr_0 = 3.2167e-04
Loss = 7.3824e-02, PNorm = 66.2211, GNorm = 0.9172, lr_0 = 3.2145e-04
Loss = 6.8140e-02, PNorm = 66.2276, GNorm = 0.9190, lr_0 = 3.2123e-04
Loss = 6.6572e-02, PNorm = 66.2351, GNorm = 0.8067, lr_0 = 3.2101e-04
Loss = 7.4597e-02, PNorm = 66.2393, GNorm = 0.9207, lr_0 = 3.2079e-04
Loss = 6.7370e-02, PNorm = 66.2448, GNorm = 0.5024, lr_0 = 3.2057e-04
Loss = 6.2410e-02, PNorm = 66.2512, GNorm = 0.5540, lr_0 = 3.2035e-04
Loss = 6.6337e-02, PNorm = 66.2556, GNorm = 0.6599, lr_0 = 3.2013e-04
Loss = 7.3773e-02, PNorm = 66.2614, GNorm = 0.6396, lr_0 = 3.1991e-04
Loss = 6.8435e-02, PNorm = 66.2673, GNorm = 0.5634, lr_0 = 3.1969e-04
Loss = 5.8560e-02, PNorm = 66.2744, GNorm = 0.5918, lr_0 = 3.1947e-04
Loss = 5.7779e-02, PNorm = 66.2772, GNorm = 0.7562, lr_0 = 3.1925e-04
Loss = 7.3940e-02, PNorm = 66.2820, GNorm = 0.5348, lr_0 = 3.1904e-04
Loss = 6.6380e-02, PNorm = 66.2906, GNorm = 0.5527, lr_0 = 3.1882e-04
Loss = 6.2066e-02, PNorm = 66.2959, GNorm = 0.8238, lr_0 = 3.1860e-04
Loss = 7.2685e-02, PNorm = 66.3022, GNorm = 1.1470, lr_0 = 3.1838e-04
Loss = 6.3322e-02, PNorm = 66.3081, GNorm = 0.7108, lr_0 = 3.1816e-04
Loss = 6.6702e-02, PNorm = 66.3156, GNorm = 0.8627, lr_0 = 3.1794e-04
Loss = 7.7330e-02, PNorm = 66.3200, GNorm = 1.1935, lr_0 = 3.1773e-04
Loss = 6.5065e-02, PNorm = 66.3264, GNorm = 0.5280, lr_0 = 3.1751e-04
Loss = 6.2355e-02, PNorm = 66.3306, GNorm = 0.4525, lr_0 = 3.1729e-04
Loss = 6.2371e-02, PNorm = 66.3351, GNorm = 0.5294, lr_0 = 3.1707e-04
Loss = 7.1563e-02, PNorm = 66.3428, GNorm = 0.8837, lr_0 = 3.1686e-04
Loss = 6.9562e-02, PNorm = 66.3513, GNorm = 0.5684, lr_0 = 3.1664e-04
Loss = 6.4063e-02, PNorm = 66.3599, GNorm = 0.4618, lr_0 = 3.1642e-04
Loss = 7.3938e-02, PNorm = 66.3703, GNorm = 0.6685, lr_0 = 3.1621e-04
Validation mae = 0.390500
Epoch 16
Loss = 5.5247e-02, PNorm = 66.3779, GNorm = 0.7823, lr_0 = 3.1599e-04
Loss = 6.4513e-02, PNorm = 66.3860, GNorm = 0.6536, lr_0 = 3.1577e-04
Loss = 5.8683e-02, PNorm = 66.3936, GNorm = 0.6344, lr_0 = 3.1556e-04
Loss = 5.6518e-02, PNorm = 66.3983, GNorm = 0.5274, lr_0 = 3.1534e-04
Loss = 5.8765e-02, PNorm = 66.4018, GNorm = 0.4929, lr_0 = 3.1512e-04
Loss = 5.3225e-02, PNorm = 66.4076, GNorm = 1.0129, lr_0 = 3.1491e-04
Loss = 6.3752e-02, PNorm = 66.4145, GNorm = 0.4921, lr_0 = 3.1469e-04
Loss = 6.5824e-02, PNorm = 66.4220, GNorm = 0.7796, lr_0 = 3.1448e-04
Loss = 6.5121e-02, PNorm = 66.4289, GNorm = 0.4482, lr_0 = 3.1426e-04
Loss = 6.3928e-02, PNorm = 66.4349, GNorm = 0.5030, lr_0 = 3.1405e-04
Loss = 5.0860e-02, PNorm = 66.4428, GNorm = 0.7463, lr_0 = 3.1383e-04
Loss = 5.5279e-02, PNorm = 66.4446, GNorm = 0.4763, lr_0 = 3.1362e-04
Loss = 5.7791e-02, PNorm = 66.4484, GNorm = 0.4685, lr_0 = 3.1340e-04
Loss = 5.9691e-02, PNorm = 66.4530, GNorm = 0.6085, lr_0 = 3.1319e-04
Loss = 6.2224e-02, PNorm = 66.4622, GNorm = 0.6685, lr_0 = 3.1297e-04
Loss = 5.9596e-02, PNorm = 66.4702, GNorm = 0.5960, lr_0 = 3.1276e-04
Loss = 5.5536e-02, PNorm = 66.4744, GNorm = 0.5589, lr_0 = 3.1254e-04
Loss = 5.7465e-02, PNorm = 66.4813, GNorm = 0.6126, lr_0 = 3.1233e-04
Loss = 5.8813e-02, PNorm = 66.4886, GNorm = 0.5607, lr_0 = 3.1212e-04
Loss = 5.8171e-02, PNorm = 66.4949, GNorm = 0.6022, lr_0 = 3.1190e-04
Loss = 5.7734e-02, PNorm = 66.4992, GNorm = 0.8098, lr_0 = 3.1169e-04
Loss = 5.9836e-02, PNorm = 66.5067, GNorm = 0.6352, lr_0 = 3.1147e-04
Loss = 6.6681e-02, PNorm = 66.5158, GNorm = 1.2445, lr_0 = 3.1126e-04
Loss = 5.5641e-02, PNorm = 66.5250, GNorm = 0.5631, lr_0 = 3.1105e-04
Loss = 5.5993e-02, PNorm = 66.5323, GNorm = 0.5848, lr_0 = 3.1083e-04
Loss = 5.2010e-02, PNorm = 66.5400, GNorm = 0.5756, lr_0 = 3.1062e-04
Loss = 5.9826e-02, PNorm = 66.5475, GNorm = 0.4829, lr_0 = 3.1041e-04
Loss = 6.2211e-02, PNorm = 66.5548, GNorm = 0.5412, lr_0 = 3.1020e-04
Loss = 5.4221e-02, PNorm = 66.5618, GNorm = 0.6577, lr_0 = 3.0998e-04
Loss = 5.6073e-02, PNorm = 66.5653, GNorm = 0.5343, lr_0 = 3.0977e-04
Loss = 5.9008e-02, PNorm = 66.5685, GNorm = 0.6717, lr_0 = 3.0956e-04
Loss = 6.0435e-02, PNorm = 66.5738, GNorm = 0.7265, lr_0 = 3.0935e-04
Loss = 5.5848e-02, PNorm = 66.5775, GNorm = 0.4637, lr_0 = 3.0914e-04
Loss = 6.2163e-02, PNorm = 66.5837, GNorm = 0.6861, lr_0 = 3.0892e-04
Loss = 6.1339e-02, PNorm = 66.5902, GNorm = 0.5398, lr_0 = 3.0871e-04
Loss = 5.5061e-02, PNorm = 66.5930, GNorm = 0.4677, lr_0 = 3.0850e-04
Loss = 6.2493e-02, PNorm = 66.5980, GNorm = 0.5882, lr_0 = 3.0829e-04
Loss = 6.9008e-02, PNorm = 66.6054, GNorm = 0.6895, lr_0 = 3.0808e-04
Loss = 6.0312e-02, PNorm = 66.6114, GNorm = 0.6391, lr_0 = 3.0787e-04
Loss = 6.8140e-02, PNorm = 66.6172, GNorm = 0.6032, lr_0 = 3.0766e-04
Loss = 6.9127e-02, PNorm = 66.6210, GNorm = 0.5356, lr_0 = 3.0745e-04
Loss = 6.8319e-02, PNorm = 66.6229, GNorm = 0.5245, lr_0 = 3.0723e-04
Loss = 6.3854e-02, PNorm = 66.6277, GNorm = 0.7502, lr_0 = 3.0702e-04
Loss = 5.7928e-02, PNorm = 66.6351, GNorm = 0.4902, lr_0 = 3.0681e-04
Loss = 5.9770e-02, PNorm = 66.6429, GNorm = 0.3895, lr_0 = 3.0660e-04
Loss = 6.7273e-02, PNorm = 66.6489, GNorm = 0.7339, lr_0 = 3.0639e-04
Loss = 6.6038e-02, PNorm = 66.6554, GNorm = 0.4768, lr_0 = 3.0618e-04
Loss = 5.5345e-02, PNorm = 66.6615, GNorm = 0.5767, lr_0 = 3.0597e-04
Loss = 5.8414e-02, PNorm = 66.6648, GNorm = 0.6324, lr_0 = 3.0576e-04
Loss = 6.3166e-02, PNorm = 66.6707, GNorm = 0.5324, lr_0 = 3.0555e-04
Loss = 5.3975e-02, PNorm = 66.6773, GNorm = 0.4758, lr_0 = 3.0535e-04
Loss = 5.6413e-02, PNorm = 66.6841, GNorm = 0.4980, lr_0 = 3.0514e-04
Loss = 5.8445e-02, PNorm = 66.6922, GNorm = 0.7053, lr_0 = 3.0493e-04
Loss = 6.1534e-02, PNorm = 66.7007, GNorm = 0.5722, lr_0 = 3.0472e-04
Loss = 5.3787e-02, PNorm = 66.7058, GNorm = 0.4598, lr_0 = 3.0451e-04
Loss = 5.5865e-02, PNorm = 66.7107, GNorm = 0.4423, lr_0 = 3.0430e-04
Loss = 5.9693e-02, PNorm = 66.7154, GNorm = 0.5083, lr_0 = 3.0409e-04
Loss = 5.9036e-02, PNorm = 66.7179, GNorm = 0.6431, lr_0 = 3.0388e-04
Loss = 6.8079e-02, PNorm = 66.7227, GNorm = 0.5469, lr_0 = 3.0368e-04
Loss = 6.2271e-02, PNorm = 66.7304, GNorm = 0.5420, lr_0 = 3.0347e-04
Loss = 5.5216e-02, PNorm = 66.7371, GNorm = 0.6070, lr_0 = 3.0326e-04
Loss = 5.9518e-02, PNorm = 66.7456, GNorm = 0.5292, lr_0 = 3.0305e-04
Loss = 5.9263e-02, PNorm = 66.7515, GNorm = 0.5312, lr_0 = 3.0284e-04
Loss = 5.8171e-02, PNorm = 66.7587, GNorm = 0.5361, lr_0 = 3.0264e-04
Loss = 6.3525e-02, PNorm = 66.7653, GNorm = 0.6113, lr_0 = 3.0243e-04
Loss = 6.4410e-02, PNorm = 66.7711, GNorm = 0.7591, lr_0 = 3.0222e-04
Loss = 6.0421e-02, PNorm = 66.7777, GNorm = 0.4485, lr_0 = 3.0202e-04
Loss = 7.4412e-02, PNorm = 66.7828, GNorm = 0.5493, lr_0 = 3.0181e-04
Loss = 6.2727e-02, PNorm = 66.7885, GNorm = 0.8491, lr_0 = 3.0160e-04
Loss = 6.0734e-02, PNorm = 66.7941, GNorm = 0.5934, lr_0 = 3.0140e-04
Loss = 6.7888e-02, PNorm = 66.7998, GNorm = 0.8209, lr_0 = 3.0119e-04
Loss = 6.6698e-02, PNorm = 66.8031, GNorm = 0.4435, lr_0 = 3.0098e-04
Loss = 6.0523e-02, PNorm = 66.8091, GNorm = 0.6570, lr_0 = 3.0078e-04
Loss = 6.7684e-02, PNorm = 66.8174, GNorm = 0.9485, lr_0 = 3.0057e-04
Loss = 6.0218e-02, PNorm = 66.8250, GNorm = 0.6911, lr_0 = 3.0036e-04
Loss = 5.9014e-02, PNorm = 66.8304, GNorm = 0.8275, lr_0 = 3.0016e-04
Loss = 6.9287e-02, PNorm = 66.8367, GNorm = 0.6132, lr_0 = 2.9995e-04
Loss = 6.8412e-02, PNorm = 66.8454, GNorm = 0.6065, lr_0 = 2.9975e-04
Loss = 5.9691e-02, PNorm = 66.8514, GNorm = 0.5639, lr_0 = 2.9954e-04
Loss = 6.2067e-02, PNorm = 66.8571, GNorm = 0.4710, lr_0 = 2.9934e-04
Loss = 5.9916e-02, PNorm = 66.8633, GNorm = 0.9726, lr_0 = 2.9913e-04
Loss = 5.8718e-02, PNorm = 66.8687, GNorm = 0.8210, lr_0 = 2.9893e-04
Loss = 6.9974e-02, PNorm = 66.8741, GNorm = 0.5038, lr_0 = 2.9872e-04
Loss = 6.4496e-02, PNorm = 66.8797, GNorm = 0.4921, lr_0 = 2.9852e-04
Loss = 5.1937e-02, PNorm = 66.8869, GNorm = 0.3804, lr_0 = 2.9831e-04
Loss = 5.6244e-02, PNorm = 66.8917, GNorm = 0.4222, lr_0 = 2.9811e-04
Loss = 6.8063e-02, PNorm = 66.8978, GNorm = 0.4363, lr_0 = 2.9790e-04
Loss = 5.6846e-02, PNorm = 66.9029, GNorm = 0.5863, lr_0 = 2.9770e-04
Loss = 5.8930e-02, PNorm = 66.9090, GNorm = 0.7255, lr_0 = 2.9750e-04
Loss = 5.8561e-02, PNorm = 66.9108, GNorm = 0.5210, lr_0 = 2.9729e-04
Loss = 7.2503e-02, PNorm = 66.9155, GNorm = 0.7643, lr_0 = 2.9709e-04
Loss = 5.5041e-02, PNorm = 66.9209, GNorm = 0.4664, lr_0 = 2.9689e-04
Loss = 7.3669e-02, PNorm = 66.9264, GNorm = 0.6592, lr_0 = 2.9668e-04
Loss = 5.4666e-02, PNorm = 66.9319, GNorm = 0.4391, lr_0 = 2.9648e-04
Loss = 6.5781e-02, PNorm = 66.9375, GNorm = 0.5470, lr_0 = 2.9628e-04
Loss = 6.4766e-02, PNorm = 66.9432, GNorm = 0.5706, lr_0 = 2.9607e-04
Loss = 6.2784e-02, PNorm = 66.9475, GNorm = 0.4941, lr_0 = 2.9587e-04
Loss = 6.7413e-02, PNorm = 66.9533, GNorm = 0.5529, lr_0 = 2.9567e-04
Loss = 5.0376e-02, PNorm = 66.9586, GNorm = 0.4996, lr_0 = 2.9546e-04
Loss = 5.6929e-02, PNorm = 66.9639, GNorm = 0.5496, lr_0 = 2.9526e-04
Loss = 5.9237e-02, PNorm = 66.9698, GNorm = 0.5810, lr_0 = 2.9506e-04
Loss = 6.2593e-02, PNorm = 66.9747, GNorm = 0.7274, lr_0 = 2.9486e-04
Loss = 6.1927e-02, PNorm = 66.9782, GNorm = 0.6213, lr_0 = 2.9466e-04
Loss = 7.2405e-02, PNorm = 66.9816, GNorm = 0.6124, lr_0 = 2.9445e-04
Loss = 7.1229e-02, PNorm = 66.9858, GNorm = 1.0991, lr_0 = 2.9425e-04
Loss = 6.6950e-02, PNorm = 66.9919, GNorm = 0.4294, lr_0 = 2.9405e-04
Loss = 7.2707e-02, PNorm = 67.0001, GNorm = 0.5532, lr_0 = 2.9385e-04
Loss = 5.3471e-02, PNorm = 67.0070, GNorm = 0.6172, lr_0 = 2.9365e-04
Loss = 6.1922e-02, PNorm = 67.0092, GNorm = 0.8929, lr_0 = 2.9345e-04
Loss = 6.4945e-02, PNorm = 67.0125, GNorm = 0.5927, lr_0 = 2.9325e-04
Loss = 7.2116e-02, PNorm = 67.0200, GNorm = 1.0168, lr_0 = 2.9305e-04
Loss = 6.2629e-02, PNorm = 67.0253, GNorm = 0.5268, lr_0 = 2.9284e-04
Loss = 7.4060e-02, PNorm = 67.0300, GNorm = 0.5933, lr_0 = 2.9264e-04
Loss = 6.3644e-02, PNorm = 67.0337, GNorm = 0.6164, lr_0 = 2.9244e-04
Loss = 5.8284e-02, PNorm = 67.0386, GNorm = 0.6439, lr_0 = 2.9224e-04
Loss = 6.0651e-02, PNorm = 67.0457, GNorm = 1.4271, lr_0 = 2.9204e-04
Loss = 7.1139e-02, PNorm = 67.0487, GNorm = 0.7721, lr_0 = 2.9184e-04
Loss = 7.0445e-02, PNorm = 67.0530, GNorm = 0.7657, lr_0 = 2.9164e-04
Loss = 6.3564e-02, PNorm = 67.0581, GNorm = 0.6711, lr_0 = 2.9144e-04
Loss = 6.0319e-02, PNorm = 67.0639, GNorm = 0.5900, lr_0 = 2.9124e-04
Validation mae = 0.385709
Epoch 17
Loss = 4.8854e-02, PNorm = 67.0700, GNorm = 0.4359, lr_0 = 2.9104e-04
Loss = 5.2960e-02, PNorm = 67.0770, GNorm = 1.0367, lr_0 = 2.9084e-04
Loss = 5.6935e-02, PNorm = 67.0844, GNorm = 0.7434, lr_0 = 2.9065e-04
Loss = 5.1066e-02, PNorm = 67.0910, GNorm = 0.6835, lr_0 = 2.9045e-04
Loss = 5.4108e-02, PNorm = 67.0976, GNorm = 0.4786, lr_0 = 2.9025e-04
Loss = 5.3676e-02, PNorm = 67.1036, GNorm = 0.5905, lr_0 = 2.9005e-04
Loss = 6.3466e-02, PNorm = 67.1115, GNorm = 0.5236, lr_0 = 2.8985e-04
Loss = 5.6484e-02, PNorm = 67.1193, GNorm = 0.5378, lr_0 = 2.8965e-04
Loss = 5.3029e-02, PNorm = 67.1254, GNorm = 0.3850, lr_0 = 2.8945e-04
Loss = 5.3440e-02, PNorm = 67.1317, GNorm = 0.4217, lr_0 = 2.8925e-04
Loss = 5.2848e-02, PNorm = 67.1374, GNorm = 0.6555, lr_0 = 2.8906e-04
Loss = 5.7799e-02, PNorm = 67.1436, GNorm = 1.2316, lr_0 = 2.8886e-04
Loss = 5.5507e-02, PNorm = 67.1500, GNorm = 0.6406, lr_0 = 2.8866e-04
Loss = 5.4724e-02, PNorm = 67.1536, GNorm = 0.4102, lr_0 = 2.8846e-04
Loss = 6.4993e-02, PNorm = 67.1586, GNorm = 1.1548, lr_0 = 2.8826e-04
Loss = 6.3968e-02, PNorm = 67.1649, GNorm = 0.4742, lr_0 = 2.8807e-04
Loss = 5.4964e-02, PNorm = 67.1701, GNorm = 0.5780, lr_0 = 2.8787e-04
Loss = 5.7847e-02, PNorm = 67.1729, GNorm = 0.5217, lr_0 = 2.8767e-04
Loss = 6.0657e-02, PNorm = 67.1776, GNorm = 0.8613, lr_0 = 2.8748e-04
Loss = 6.5850e-02, PNorm = 67.1843, GNorm = 0.7494, lr_0 = 2.8728e-04
Loss = 5.1155e-02, PNorm = 67.1879, GNorm = 0.5418, lr_0 = 2.8708e-04
Loss = 5.0144e-02, PNorm = 67.1925, GNorm = 0.9594, lr_0 = 2.8689e-04
Loss = 5.4328e-02, PNorm = 67.2011, GNorm = 0.5883, lr_0 = 2.8669e-04
Loss = 6.1362e-02, PNorm = 67.2104, GNorm = 0.7404, lr_0 = 2.8649e-04
Loss = 5.7513e-02, PNorm = 67.2184, GNorm = 0.6698, lr_0 = 2.8630e-04
Loss = 6.0817e-02, PNorm = 67.2237, GNorm = 0.5908, lr_0 = 2.8610e-04
Loss = 5.1706e-02, PNorm = 67.2281, GNorm = 0.8035, lr_0 = 2.8590e-04
Loss = 5.7992e-02, PNorm = 67.2344, GNorm = 0.4144, lr_0 = 2.8571e-04
Loss = 5.0325e-02, PNorm = 67.2412, GNorm = 0.4403, lr_0 = 2.8551e-04
Loss = 5.7892e-02, PNorm = 67.2470, GNorm = 0.9315, lr_0 = 2.8532e-04
Loss = 5.5467e-02, PNorm = 67.2506, GNorm = 0.6160, lr_0 = 2.8512e-04
Loss = 5.1982e-02, PNorm = 67.2580, GNorm = 0.3738, lr_0 = 2.8493e-04
Loss = 5.7108e-02, PNorm = 67.2659, GNorm = 0.4158, lr_0 = 2.8473e-04
Loss = 6.0404e-02, PNorm = 67.2718, GNorm = 0.6492, lr_0 = 2.8454e-04
Loss = 6.6146e-02, PNorm = 67.2773, GNorm = 0.6632, lr_0 = 2.8434e-04
Loss = 5.8053e-02, PNorm = 67.2838, GNorm = 0.6588, lr_0 = 2.8415e-04
Loss = 4.8450e-02, PNorm = 67.2897, GNorm = 0.4038, lr_0 = 2.8395e-04
Loss = 6.5103e-02, PNorm = 67.2931, GNorm = 0.6433, lr_0 = 2.8376e-04
Loss = 5.8424e-02, PNorm = 67.2964, GNorm = 0.5170, lr_0 = 2.8356e-04
Loss = 4.6002e-02, PNorm = 67.2988, GNorm = 0.5261, lr_0 = 2.8337e-04
Loss = 6.0991e-02, PNorm = 67.3022, GNorm = 0.4497, lr_0 = 2.8317e-04
Loss = 5.9457e-02, PNorm = 67.3064, GNorm = 0.5102, lr_0 = 2.8298e-04
Loss = 5.3286e-02, PNorm = 67.3126, GNorm = 0.4004, lr_0 = 2.8279e-04
Loss = 5.4456e-02, PNorm = 67.3182, GNorm = 0.7089, lr_0 = 2.8259e-04
Loss = 6.0513e-02, PNorm = 67.3215, GNorm = 0.4225, lr_0 = 2.8240e-04
Loss = 6.1759e-02, PNorm = 67.3268, GNorm = 0.5598, lr_0 = 2.8221e-04
Loss = 5.7274e-02, PNorm = 67.3334, GNorm = 0.4964, lr_0 = 2.8201e-04
Loss = 5.8354e-02, PNorm = 67.3382, GNorm = 0.6176, lr_0 = 2.8182e-04
Loss = 4.7899e-02, PNorm = 67.3429, GNorm = 0.7090, lr_0 = 2.8163e-04
Loss = 5.6630e-02, PNorm = 67.3486, GNorm = 0.6204, lr_0 = 2.8143e-04
Loss = 5.4265e-02, PNorm = 67.3544, GNorm = 0.6805, lr_0 = 2.8124e-04
Loss = 5.4299e-02, PNorm = 67.3581, GNorm = 0.6315, lr_0 = 2.8105e-04
Loss = 5.3691e-02, PNorm = 67.3599, GNorm = 0.5174, lr_0 = 2.8085e-04
Loss = 5.5347e-02, PNorm = 67.3657, GNorm = 0.5793, lr_0 = 2.8066e-04
Loss = 6.3911e-02, PNorm = 67.3738, GNorm = 0.6660, lr_0 = 2.8047e-04
Loss = 6.3097e-02, PNorm = 67.3829, GNorm = 0.6183, lr_0 = 2.8028e-04
Loss = 6.0654e-02, PNorm = 67.3891, GNorm = 0.4826, lr_0 = 2.8009e-04
Loss = 5.4777e-02, PNorm = 67.3923, GNorm = 0.8499, lr_0 = 2.7989e-04
Loss = 5.3388e-02, PNorm = 67.3952, GNorm = 0.4642, lr_0 = 2.7970e-04
Loss = 5.3878e-02, PNorm = 67.3992, GNorm = 0.4486, lr_0 = 2.7951e-04
Loss = 6.1190e-02, PNorm = 67.4054, GNorm = 0.6373, lr_0 = 2.7932e-04
Loss = 7.0530e-02, PNorm = 67.4106, GNorm = 0.8178, lr_0 = 2.7913e-04
Loss = 6.2539e-02, PNorm = 67.4148, GNorm = 0.4888, lr_0 = 2.7894e-04
Loss = 5.9810e-02, PNorm = 67.4191, GNorm = 0.5276, lr_0 = 2.7875e-04
Loss = 5.9538e-02, PNorm = 67.4245, GNorm = 0.4361, lr_0 = 2.7855e-04
Loss = 5.6962e-02, PNorm = 67.4286, GNorm = 0.4715, lr_0 = 2.7836e-04
Loss = 5.1058e-02, PNorm = 67.4344, GNorm = 0.5942, lr_0 = 2.7817e-04
Loss = 6.9103e-02, PNorm = 67.4422, GNorm = 0.4685, lr_0 = 2.7798e-04
Loss = 5.9477e-02, PNorm = 67.4504, GNorm = 0.6200, lr_0 = 2.7779e-04
Loss = 6.8854e-02, PNorm = 67.4560, GNorm = 0.6984, lr_0 = 2.7760e-04
Loss = 5.5387e-02, PNorm = 67.4603, GNorm = 0.5178, lr_0 = 2.7741e-04
Loss = 6.2640e-02, PNorm = 67.4643, GNorm = 0.5247, lr_0 = 2.7722e-04
Loss = 5.3593e-02, PNorm = 67.4688, GNorm = 0.5365, lr_0 = 2.7703e-04
Loss = 5.8986e-02, PNorm = 67.4747, GNorm = 0.5547, lr_0 = 2.7684e-04
Loss = 5.9297e-02, PNorm = 67.4823, GNorm = 0.5806, lr_0 = 2.7665e-04
Loss = 5.5727e-02, PNorm = 67.4889, GNorm = 0.4223, lr_0 = 2.7646e-04
Loss = 6.0832e-02, PNorm = 67.4946, GNorm = 0.8667, lr_0 = 2.7627e-04
Loss = 5.3709e-02, PNorm = 67.5005, GNorm = 0.6886, lr_0 = 2.7608e-04
Loss = 5.6294e-02, PNorm = 67.5046, GNorm = 0.4673, lr_0 = 2.7590e-04
Loss = 5.3631e-02, PNorm = 67.5065, GNorm = 0.6155, lr_0 = 2.7571e-04
Loss = 6.1842e-02, PNorm = 67.5119, GNorm = 0.5520, lr_0 = 2.7552e-04
Loss = 6.4608e-02, PNorm = 67.5178, GNorm = 0.6776, lr_0 = 2.7533e-04
Loss = 4.9588e-02, PNorm = 67.5228, GNorm = 0.5819, lr_0 = 2.7514e-04
Loss = 5.6313e-02, PNorm = 67.5270, GNorm = 0.5367, lr_0 = 2.7495e-04
Loss = 5.6912e-02, PNorm = 67.5306, GNorm = 0.5986, lr_0 = 2.7476e-04
Loss = 5.6357e-02, PNorm = 67.5356, GNorm = 0.6249, lr_0 = 2.7457e-04
Loss = 5.7713e-02, PNorm = 67.5404, GNorm = 0.7254, lr_0 = 2.7439e-04
Loss = 5.4074e-02, PNorm = 67.5458, GNorm = 0.5973, lr_0 = 2.7420e-04
Loss = 4.6000e-02, PNorm = 67.5514, GNorm = 0.5823, lr_0 = 2.7401e-04
Loss = 5.3617e-02, PNorm = 67.5558, GNorm = 0.3996, lr_0 = 2.7382e-04
Loss = 5.8970e-02, PNorm = 67.5584, GNorm = 0.6673, lr_0 = 2.7364e-04
Loss = 5.2962e-02, PNorm = 67.5616, GNorm = 0.5685, lr_0 = 2.7345e-04
Loss = 5.6148e-02, PNorm = 67.5647, GNorm = 0.7073, lr_0 = 2.7326e-04
Loss = 5.7973e-02, PNorm = 67.5674, GNorm = 0.6184, lr_0 = 2.7307e-04
Loss = 6.0190e-02, PNorm = 67.5733, GNorm = 0.7515, lr_0 = 2.7289e-04
Loss = 5.2818e-02, PNorm = 67.5790, GNorm = 0.4981, lr_0 = 2.7270e-04
Loss = 5.1637e-02, PNorm = 67.5831, GNorm = 0.5703, lr_0 = 2.7251e-04
Loss = 4.7159e-02, PNorm = 67.5850, GNorm = 0.4465, lr_0 = 2.7233e-04
Loss = 6.1090e-02, PNorm = 67.5888, GNorm = 0.6885, lr_0 = 2.7214e-04
Loss = 6.4063e-02, PNorm = 67.5933, GNorm = 0.4986, lr_0 = 2.7195e-04
Loss = 5.4141e-02, PNorm = 67.5986, GNorm = 0.5515, lr_0 = 2.7177e-04
Loss = 5.6820e-02, PNorm = 67.6039, GNorm = 0.6229, lr_0 = 2.7158e-04
Loss = 6.1493e-02, PNorm = 67.6075, GNorm = 0.4231, lr_0 = 2.7139e-04
Loss = 5.4021e-02, PNorm = 67.6118, GNorm = 0.6127, lr_0 = 2.7121e-04
Loss = 6.3247e-02, PNorm = 67.6133, GNorm = 0.6505, lr_0 = 2.7102e-04
Loss = 6.1593e-02, PNorm = 67.6168, GNorm = 0.5953, lr_0 = 2.7084e-04
Loss = 6.0023e-02, PNorm = 67.6235, GNorm = 0.5703, lr_0 = 2.7065e-04
Loss = 5.6184e-02, PNorm = 67.6307, GNorm = 0.5004, lr_0 = 2.7047e-04
Loss = 6.4173e-02, PNorm = 67.6348, GNorm = 0.5905, lr_0 = 2.7028e-04
Loss = 4.9599e-02, PNorm = 67.6387, GNorm = 0.3346, lr_0 = 2.7010e-04
Loss = 6.2826e-02, PNorm = 67.6446, GNorm = 0.6433, lr_0 = 2.6991e-04
Loss = 6.8234e-02, PNorm = 67.6507, GNorm = 0.5672, lr_0 = 2.6973e-04
Loss = 6.2985e-02, PNorm = 67.6551, GNorm = 0.6163, lr_0 = 2.6954e-04
Loss = 6.6431e-02, PNorm = 67.6600, GNorm = 0.5102, lr_0 = 2.6936e-04
Loss = 5.9878e-02, PNorm = 67.6635, GNorm = 0.5141, lr_0 = 2.6917e-04
Loss = 5.9118e-02, PNorm = 67.6685, GNorm = 0.8846, lr_0 = 2.6899e-04
Loss = 6.1210e-02, PNorm = 67.6748, GNorm = 0.4846, lr_0 = 2.6880e-04
Loss = 5.7198e-02, PNorm = 67.6789, GNorm = 0.5768, lr_0 = 2.6862e-04
Loss = 6.3953e-02, PNorm = 67.6827, GNorm = 0.5176, lr_0 = 2.6844e-04
Loss = 5.8745e-02, PNorm = 67.6870, GNorm = 0.8521, lr_0 = 2.6825e-04
Validation mae = 0.386521
Epoch 18
Loss = 4.6444e-02, PNorm = 67.6930, GNorm = 0.7617, lr_0 = 2.6807e-04
Loss = 4.5165e-02, PNorm = 67.6983, GNorm = 0.3808, lr_0 = 2.6788e-04
Loss = 4.6039e-02, PNorm = 67.7028, GNorm = 0.4599, lr_0 = 2.6770e-04
Loss = 4.8721e-02, PNorm = 67.7060, GNorm = 0.6878, lr_0 = 2.6752e-04
Loss = 5.0582e-02, PNorm = 67.7115, GNorm = 0.4333, lr_0 = 2.6733e-04
Loss = 4.8841e-02, PNorm = 67.7159, GNorm = 0.4879, lr_0 = 2.6715e-04
Loss = 4.6876e-02, PNorm = 67.7197, GNorm = 0.5942, lr_0 = 2.6697e-04
Loss = 4.8287e-02, PNorm = 67.7247, GNorm = 0.4967, lr_0 = 2.6678e-04
Loss = 5.0614e-02, PNorm = 67.7313, GNorm = 0.4973, lr_0 = 2.6660e-04
Loss = 3.9993e-02, PNorm = 67.7360, GNorm = 0.8326, lr_0 = 2.6642e-04
Loss = 5.5000e-02, PNorm = 67.7400, GNorm = 0.5882, lr_0 = 2.6624e-04
Loss = 5.4559e-02, PNorm = 67.7461, GNorm = 0.5101, lr_0 = 2.6605e-04
Loss = 4.5089e-02, PNorm = 67.7494, GNorm = 0.4932, lr_0 = 2.6587e-04
Loss = 4.6259e-02, PNorm = 67.7544, GNorm = 0.6340, lr_0 = 2.6569e-04
Loss = 5.9509e-02, PNorm = 67.7588, GNorm = 0.4683, lr_0 = 2.6551e-04
Loss = 5.5581e-02, PNorm = 67.7632, GNorm = 0.6798, lr_0 = 2.6533e-04
Loss = 6.0737e-02, PNorm = 67.7692, GNorm = 0.5954, lr_0 = 2.6514e-04
Loss = 5.0392e-02, PNorm = 67.7751, GNorm = 0.4636, lr_0 = 2.6496e-04
Loss = 5.1405e-02, PNorm = 67.7804, GNorm = 0.4086, lr_0 = 2.6478e-04
Loss = 5.2768e-02, PNorm = 67.7861, GNorm = 0.6866, lr_0 = 2.6460e-04
Loss = 4.8982e-02, PNorm = 67.7898, GNorm = 0.5316, lr_0 = 2.6442e-04
Loss = 4.7789e-02, PNorm = 67.7935, GNorm = 0.6739, lr_0 = 2.6424e-04
Loss = 5.4660e-02, PNorm = 67.7979, GNorm = 0.5158, lr_0 = 2.6406e-04
Loss = 4.7106e-02, PNorm = 67.8031, GNorm = 0.6480, lr_0 = 2.6388e-04
Loss = 5.0809e-02, PNorm = 67.8062, GNorm = 0.8180, lr_0 = 2.6369e-04
Loss = 5.0337e-02, PNorm = 67.8110, GNorm = 0.6962, lr_0 = 2.6351e-04
Loss = 5.8143e-02, PNorm = 67.8149, GNorm = 0.5093, lr_0 = 2.6333e-04
Loss = 4.8538e-02, PNorm = 67.8205, GNorm = 0.4070, lr_0 = 2.6315e-04
Loss = 4.0142e-02, PNorm = 67.8236, GNorm = 0.8442, lr_0 = 2.6297e-04
Loss = 4.4766e-02, PNorm = 67.8250, GNorm = 0.3301, lr_0 = 2.6279e-04
Loss = 5.5432e-02, PNorm = 67.8262, GNorm = 0.5345, lr_0 = 2.6261e-04
Loss = 6.3135e-02, PNorm = 67.8315, GNorm = 0.7337, lr_0 = 2.6243e-04
Loss = 5.2906e-02, PNorm = 67.8388, GNorm = 0.6457, lr_0 = 2.6225e-04
Loss = 4.9699e-02, PNorm = 67.8459, GNorm = 0.6459, lr_0 = 2.6207e-04
Loss = 5.1025e-02, PNorm = 67.8515, GNorm = 0.6231, lr_0 = 2.6189e-04
Loss = 4.6823e-02, PNorm = 67.8582, GNorm = 0.6474, lr_0 = 2.6171e-04
Loss = 5.2605e-02, PNorm = 67.8656, GNorm = 0.6779, lr_0 = 2.6153e-04
Loss = 5.4278e-02, PNorm = 67.8709, GNorm = 0.6010, lr_0 = 2.6136e-04
Loss = 5.4375e-02, PNorm = 67.8772, GNorm = 0.6222, lr_0 = 2.6118e-04
Loss = 4.8111e-02, PNorm = 67.8827, GNorm = 0.7137, lr_0 = 2.6100e-04
Loss = 5.8190e-02, PNorm = 67.8856, GNorm = 0.5760, lr_0 = 2.6082e-04
Loss = 5.9204e-02, PNorm = 67.8909, GNorm = 0.6193, lr_0 = 2.6064e-04
Loss = 5.2022e-02, PNorm = 67.8960, GNorm = 0.5798, lr_0 = 2.6046e-04
Loss = 5.2861e-02, PNorm = 67.9033, GNorm = 0.5112, lr_0 = 2.6028e-04
Loss = 5.0848e-02, PNorm = 67.9102, GNorm = 0.5733, lr_0 = 2.6011e-04
Loss = 6.0731e-02, PNorm = 67.9134, GNorm = 0.4891, lr_0 = 2.5993e-04
Loss = 5.6048e-02, PNorm = 67.9166, GNorm = 0.7799, lr_0 = 2.5975e-04
Loss = 5.6074e-02, PNorm = 67.9196, GNorm = 0.6073, lr_0 = 2.5957e-04
Loss = 5.5305e-02, PNorm = 67.9242, GNorm = 1.0451, lr_0 = 2.5939e-04
Loss = 4.9050e-02, PNorm = 67.9299, GNorm = 0.3322, lr_0 = 2.5922e-04
Loss = 5.4420e-02, PNorm = 67.9324, GNorm = 0.4859, lr_0 = 2.5904e-04
Loss = 5.1991e-02, PNorm = 67.9387, GNorm = 0.5980, lr_0 = 2.5886e-04
Loss = 5.0699e-02, PNorm = 67.9437, GNorm = 0.4806, lr_0 = 2.5868e-04
Loss = 5.8469e-02, PNorm = 67.9470, GNorm = 0.5067, lr_0 = 2.5851e-04
Loss = 4.8308e-02, PNorm = 67.9505, GNorm = 0.4512, lr_0 = 2.5833e-04
Loss = 6.1097e-02, PNorm = 67.9554, GNorm = 0.5273, lr_0 = 2.5815e-04
Loss = 5.6091e-02, PNorm = 67.9584, GNorm = 0.7060, lr_0 = 2.5797e-04
Loss = 5.8051e-02, PNorm = 67.9625, GNorm = 0.4529, lr_0 = 2.5780e-04
Loss = 4.7543e-02, PNorm = 67.9673, GNorm = 0.5878, lr_0 = 2.5762e-04
Loss = 5.6147e-02, PNorm = 67.9720, GNorm = 0.4640, lr_0 = 2.5745e-04
Loss = 6.2811e-02, PNorm = 67.9764, GNorm = 0.7174, lr_0 = 2.5727e-04
Loss = 6.0863e-02, PNorm = 67.9835, GNorm = 0.5405, lr_0 = 2.5709e-04
Loss = 5.1396e-02, PNorm = 67.9906, GNorm = 0.4459, lr_0 = 2.5692e-04
Loss = 6.6754e-02, PNorm = 67.9957, GNorm = 0.4910, lr_0 = 2.5674e-04
Loss = 6.2313e-02, PNorm = 67.9999, GNorm = 0.7950, lr_0 = 2.5656e-04
Loss = 5.0595e-02, PNorm = 68.0059, GNorm = 0.4895, lr_0 = 2.5639e-04
Loss = 5.9480e-02, PNorm = 68.0102, GNorm = 0.8316, lr_0 = 2.5621e-04
Loss = 5.6962e-02, PNorm = 68.0145, GNorm = 0.7405, lr_0 = 2.5604e-04
Loss = 5.8504e-02, PNorm = 68.0191, GNorm = 0.6221, lr_0 = 2.5586e-04
Loss = 5.2678e-02, PNorm = 68.0221, GNorm = 0.4466, lr_0 = 2.5569e-04
Loss = 5.0069e-02, PNorm = 68.0268, GNorm = 0.6954, lr_0 = 2.5551e-04
Loss = 5.6811e-02, PNorm = 68.0307, GNorm = 0.9824, lr_0 = 2.5534e-04
Loss = 5.5836e-02, PNorm = 68.0323, GNorm = 0.7674, lr_0 = 2.5516e-04
Loss = 5.1035e-02, PNorm = 68.0362, GNorm = 0.6715, lr_0 = 2.5499e-04
Loss = 5.1790e-02, PNorm = 68.0398, GNorm = 0.3923, lr_0 = 2.5481e-04
Loss = 4.5712e-02, PNorm = 68.0447, GNorm = 0.4413, lr_0 = 2.5464e-04
Loss = 6.3483e-02, PNorm = 68.0496, GNorm = 0.5433, lr_0 = 2.5446e-04
Loss = 6.0636e-02, PNorm = 68.0555, GNorm = 0.4956, lr_0 = 2.5429e-04
Loss = 6.1091e-02, PNorm = 68.0602, GNorm = 0.4711, lr_0 = 2.5411e-04
Loss = 5.7786e-02, PNorm = 68.0668, GNorm = 0.6354, lr_0 = 2.5394e-04
Loss = 5.5417e-02, PNorm = 68.0737, GNorm = 0.5151, lr_0 = 2.5377e-04
Loss = 6.0230e-02, PNorm = 68.0800, GNorm = 0.5888, lr_0 = 2.5359e-04
Loss = 6.1060e-02, PNorm = 68.0845, GNorm = 0.5779, lr_0 = 2.5342e-04
Loss = 6.3183e-02, PNorm = 68.0893, GNorm = 0.7327, lr_0 = 2.5325e-04
Loss = 6.5620e-02, PNorm = 68.0938, GNorm = 0.5494, lr_0 = 2.5307e-04
Loss = 5.6245e-02, PNorm = 68.0988, GNorm = 0.5528, lr_0 = 2.5290e-04
Loss = 5.5001e-02, PNorm = 68.1037, GNorm = 0.9365, lr_0 = 2.5273e-04
Loss = 5.2874e-02, PNorm = 68.1093, GNorm = 0.5706, lr_0 = 2.5255e-04
Loss = 5.6913e-02, PNorm = 68.1134, GNorm = 0.7743, lr_0 = 2.5238e-04
Loss = 4.9533e-02, PNorm = 68.1171, GNorm = 0.7099, lr_0 = 2.5221e-04
Loss = 6.1605e-02, PNorm = 68.1209, GNorm = 0.7704, lr_0 = 2.5203e-04
Loss = 5.2839e-02, PNorm = 68.1246, GNorm = 0.5284, lr_0 = 2.5186e-04
Loss = 5.2299e-02, PNorm = 68.1282, GNorm = 0.6572, lr_0 = 2.5169e-04
Loss = 5.7614e-02, PNorm = 68.1319, GNorm = 0.7269, lr_0 = 2.5152e-04
Loss = 5.0235e-02, PNorm = 68.1382, GNorm = 0.4516, lr_0 = 2.5134e-04
Loss = 5.6802e-02, PNorm = 68.1430, GNorm = 0.5053, lr_0 = 2.5117e-04
Loss = 5.5188e-02, PNorm = 68.1478, GNorm = 0.4994, lr_0 = 2.5100e-04
Loss = 5.5509e-02, PNorm = 68.1510, GNorm = 0.7582, lr_0 = 2.5083e-04
Loss = 6.7650e-02, PNorm = 68.1579, GNorm = 0.6284, lr_0 = 2.5066e-04
Loss = 6.1027e-02, PNorm = 68.1636, GNorm = 0.9358, lr_0 = 2.5048e-04
Loss = 5.6011e-02, PNorm = 68.1696, GNorm = 0.5899, lr_0 = 2.5031e-04
Loss = 4.6742e-02, PNorm = 68.1755, GNorm = 0.6275, lr_0 = 2.5014e-04
Loss = 5.5117e-02, PNorm = 68.1772, GNorm = 0.5082, lr_0 = 2.4997e-04
Loss = 5.5731e-02, PNorm = 68.1812, GNorm = 0.6566, lr_0 = 2.4980e-04
Loss = 5.9623e-02, PNorm = 68.1861, GNorm = 0.9634, lr_0 = 2.4963e-04
Loss = 4.7683e-02, PNorm = 68.1896, GNorm = 0.4008, lr_0 = 2.4946e-04
Loss = 6.0513e-02, PNorm = 68.1928, GNorm = 0.5923, lr_0 = 2.4929e-04
Loss = 5.9597e-02, PNorm = 68.1999, GNorm = 0.5807, lr_0 = 2.4911e-04
Loss = 5.2809e-02, PNorm = 68.2079, GNorm = 0.6331, lr_0 = 2.4894e-04
Loss = 5.1696e-02, PNorm = 68.2125, GNorm = 0.4551, lr_0 = 2.4877e-04
Loss = 5.7929e-02, PNorm = 68.2157, GNorm = 0.8767, lr_0 = 2.4860e-04
Loss = 5.1163e-02, PNorm = 68.2210, GNorm = 0.5167, lr_0 = 2.4843e-04
Loss = 6.4272e-02, PNorm = 68.2244, GNorm = 0.5155, lr_0 = 2.4826e-04
Loss = 5.9080e-02, PNorm = 68.2279, GNorm = 0.9463, lr_0 = 2.4809e-04
Loss = 6.1637e-02, PNorm = 68.2317, GNorm = 0.6993, lr_0 = 2.4792e-04
Loss = 5.8396e-02, PNorm = 68.2342, GNorm = 0.5136, lr_0 = 2.4775e-04
Loss = 5.3321e-02, PNorm = 68.2376, GNorm = 0.5696, lr_0 = 2.4758e-04
Loss = 5.6247e-02, PNorm = 68.2428, GNorm = 0.4243, lr_0 = 2.4741e-04
Loss = 5.2147e-02, PNorm = 68.2462, GNorm = 0.5211, lr_0 = 2.4724e-04
Loss = 5.1145e-02, PNorm = 68.2484, GNorm = 0.5990, lr_0 = 2.4707e-04
Validation mae = 0.389190
Epoch 19
Loss = 4.5052e-02, PNorm = 68.2529, GNorm = 0.5953, lr_0 = 2.4690e-04
Loss = 5.5444e-02, PNorm = 68.2579, GNorm = 0.5991, lr_0 = 2.4674e-04
Loss = 4.3870e-02, PNorm = 68.2626, GNorm = 0.6330, lr_0 = 2.4657e-04
Loss = 4.3587e-02, PNorm = 68.2684, GNorm = 0.4800, lr_0 = 2.4640e-04
Loss = 4.5616e-02, PNorm = 68.2722, GNorm = 0.4331, lr_0 = 2.4623e-04
Loss = 4.3142e-02, PNorm = 68.2770, GNorm = 0.4226, lr_0 = 2.4606e-04
Loss = 5.5646e-02, PNorm = 68.2819, GNorm = 0.5966, lr_0 = 2.4589e-04
Loss = 4.8727e-02, PNorm = 68.2880, GNorm = 0.4701, lr_0 = 2.4572e-04
Loss = 4.8886e-02, PNorm = 68.2944, GNorm = 0.7218, lr_0 = 2.4556e-04
Loss = 4.8891e-02, PNorm = 68.2970, GNorm = 0.4952, lr_0 = 2.4539e-04
Loss = 4.8613e-02, PNorm = 68.2994, GNorm = 0.3846, lr_0 = 2.4522e-04
Loss = 5.0216e-02, PNorm = 68.3048, GNorm = 0.6563, lr_0 = 2.4505e-04
Loss = 4.6825e-02, PNorm = 68.3095, GNorm = 0.4798, lr_0 = 2.4488e-04
Loss = 5.3800e-02, PNorm = 68.3129, GNorm = 0.4936, lr_0 = 2.4472e-04
Loss = 4.9310e-02, PNorm = 68.3182, GNorm = 1.0709, lr_0 = 2.4455e-04
Loss = 4.9054e-02, PNorm = 68.3253, GNorm = 0.8320, lr_0 = 2.4438e-04
Loss = 5.2874e-02, PNorm = 68.3325, GNorm = 0.4446, lr_0 = 2.4421e-04
Loss = 4.5758e-02, PNorm = 68.3371, GNorm = 0.5136, lr_0 = 2.4405e-04
Loss = 5.5707e-02, PNorm = 68.3409, GNorm = 0.6809, lr_0 = 2.4388e-04
Loss = 4.4619e-02, PNorm = 68.3426, GNorm = 0.4506, lr_0 = 2.4371e-04
Loss = 6.9507e-02, PNorm = 68.3473, GNorm = 0.6244, lr_0 = 2.4354e-04
Loss = 4.5697e-02, PNorm = 68.3527, GNorm = 0.4902, lr_0 = 2.4338e-04
Loss = 5.4850e-02, PNorm = 68.3571, GNorm = 0.5617, lr_0 = 2.4321e-04
Loss = 4.8055e-02, PNorm = 68.3612, GNorm = 0.4762, lr_0 = 2.4304e-04
Loss = 5.2198e-02, PNorm = 68.3648, GNorm = 0.6313, lr_0 = 2.4288e-04
Loss = 4.9709e-02, PNorm = 68.3693, GNorm = 0.5619, lr_0 = 2.4271e-04
Loss = 5.5368e-02, PNorm = 68.3750, GNorm = 1.0959, lr_0 = 2.4254e-04
Loss = 4.9694e-02, PNorm = 68.3802, GNorm = 0.6944, lr_0 = 2.4238e-04
Loss = 4.8449e-02, PNorm = 68.3857, GNorm = 0.5345, lr_0 = 2.4221e-04
Loss = 4.1777e-02, PNorm = 68.3909, GNorm = 0.4550, lr_0 = 2.4205e-04
Loss = 4.4509e-02, PNorm = 68.3980, GNorm = 0.6485, lr_0 = 2.4188e-04
Loss = 5.1151e-02, PNorm = 68.4031, GNorm = 0.7685, lr_0 = 2.4171e-04
Loss = 4.9046e-02, PNorm = 68.4068, GNorm = 0.4242, lr_0 = 2.4155e-04
Loss = 5.4671e-02, PNorm = 68.4102, GNorm = 0.4878, lr_0 = 2.4138e-04
Loss = 5.0211e-02, PNorm = 68.4143, GNorm = 0.4625, lr_0 = 2.4122e-04
Loss = 4.2280e-02, PNorm = 68.4194, GNorm = 0.5181, lr_0 = 2.4105e-04
Loss = 4.3001e-02, PNorm = 68.4220, GNorm = 0.8485, lr_0 = 2.4089e-04
Loss = 4.5459e-02, PNorm = 68.4249, GNorm = 0.4233, lr_0 = 2.4072e-04
Loss = 5.6438e-02, PNorm = 68.4281, GNorm = 0.6411, lr_0 = 2.4056e-04
Loss = 4.9598e-02, PNorm = 68.4310, GNorm = 0.6220, lr_0 = 2.4039e-04
Loss = 4.3391e-02, PNorm = 68.4345, GNorm = 0.5316, lr_0 = 2.4023e-04
Loss = 5.3601e-02, PNorm = 68.4388, GNorm = 0.5014, lr_0 = 2.4006e-04
Loss = 4.7142e-02, PNorm = 68.4432, GNorm = 0.5431, lr_0 = 2.3990e-04
Loss = 4.9439e-02, PNorm = 68.4458, GNorm = 0.6522, lr_0 = 2.3974e-04
Loss = 5.1969e-02, PNorm = 68.4515, GNorm = 0.5374, lr_0 = 2.3957e-04
Loss = 5.3324e-02, PNorm = 68.4564, GNorm = 0.6868, lr_0 = 2.3941e-04
Loss = 5.4655e-02, PNorm = 68.4624, GNorm = 0.6337, lr_0 = 2.3924e-04
Loss = 5.3256e-02, PNorm = 68.4689, GNorm = 0.5451, lr_0 = 2.3908e-04
Loss = 5.0381e-02, PNorm = 68.4728, GNorm = 0.6669, lr_0 = 2.3892e-04
Loss = 4.8913e-02, PNorm = 68.4753, GNorm = 0.5302, lr_0 = 2.3875e-04
Loss = 5.1650e-02, PNorm = 68.4790, GNorm = 0.4626, lr_0 = 2.3859e-04
Loss = 5.0848e-02, PNorm = 68.4829, GNorm = 0.5444, lr_0 = 2.3842e-04
Loss = 4.7140e-02, PNorm = 68.4837, GNorm = 0.6339, lr_0 = 2.3826e-04
Loss = 4.4710e-02, PNorm = 68.4875, GNorm = 0.5465, lr_0 = 2.3810e-04
Loss = 5.0266e-02, PNorm = 68.4948, GNorm = 0.5354, lr_0 = 2.3794e-04
Loss = 5.2696e-02, PNorm = 68.5011, GNorm = 0.6728, lr_0 = 2.3777e-04
Loss = 5.1346e-02, PNorm = 68.5044, GNorm = 0.4823, lr_0 = 2.3761e-04
Loss = 5.9053e-02, PNorm = 68.5092, GNorm = 0.4580, lr_0 = 2.3745e-04
Loss = 6.1085e-02, PNorm = 68.5142, GNorm = 0.7227, lr_0 = 2.3728e-04
Loss = 4.7938e-02, PNorm = 68.5183, GNorm = 0.4976, lr_0 = 2.3712e-04
Loss = 5.9049e-02, PNorm = 68.5231, GNorm = 0.8337, lr_0 = 2.3696e-04
Loss = 4.5173e-02, PNorm = 68.5269, GNorm = 0.4751, lr_0 = 2.3680e-04
Loss = 5.1963e-02, PNorm = 68.5312, GNorm = 0.8271, lr_0 = 2.3663e-04
Loss = 5.7408e-02, PNorm = 68.5346, GNorm = 0.8173, lr_0 = 2.3647e-04
Loss = 5.0269e-02, PNorm = 68.5378, GNorm = 0.6438, lr_0 = 2.3631e-04
Loss = 4.6302e-02, PNorm = 68.5419, GNorm = 0.5488, lr_0 = 2.3615e-04
Loss = 4.9759e-02, PNorm = 68.5480, GNorm = 0.4323, lr_0 = 2.3599e-04
Loss = 4.8101e-02, PNorm = 68.5516, GNorm = 0.6843, lr_0 = 2.3582e-04
Loss = 5.5327e-02, PNorm = 68.5568, GNorm = 0.6113, lr_0 = 2.3566e-04
Loss = 5.1721e-02, PNorm = 68.5613, GNorm = 0.6207, lr_0 = 2.3550e-04
Loss = 5.2718e-02, PNorm = 68.5657, GNorm = 0.5491, lr_0 = 2.3534e-04
Loss = 5.2300e-02, PNorm = 68.5690, GNorm = 0.9125, lr_0 = 2.3518e-04
Loss = 4.6275e-02, PNorm = 68.5733, GNorm = 0.3595, lr_0 = 2.3502e-04
Loss = 5.2988e-02, PNorm = 68.5775, GNorm = 0.9162, lr_0 = 2.3486e-04
Loss = 5.3965e-02, PNorm = 68.5810, GNorm = 0.4672, lr_0 = 2.3470e-04
Loss = 4.5590e-02, PNorm = 68.5836, GNorm = 0.4969, lr_0 = 2.3454e-04
Loss = 5.4157e-02, PNorm = 68.5884, GNorm = 0.7172, lr_0 = 2.3437e-04
Loss = 4.9370e-02, PNorm = 68.5914, GNorm = 0.7922, lr_0 = 2.3421e-04
Loss = 4.7521e-02, PNorm = 68.5954, GNorm = 0.5138, lr_0 = 2.3405e-04
Loss = 4.8534e-02, PNorm = 68.6002, GNorm = 0.4709, lr_0 = 2.3389e-04
Loss = 4.9968e-02, PNorm = 68.6034, GNorm = 0.5376, lr_0 = 2.3373e-04
Loss = 5.5603e-02, PNorm = 68.6048, GNorm = 0.5897, lr_0 = 2.3357e-04
Loss = 5.1791e-02, PNorm = 68.6083, GNorm = 0.6247, lr_0 = 2.3341e-04
Loss = 4.8472e-02, PNorm = 68.6111, GNorm = 0.8191, lr_0 = 2.3325e-04
Loss = 4.3392e-02, PNorm = 68.6142, GNorm = 0.3895, lr_0 = 2.3309e-04
Loss = 5.0535e-02, PNorm = 68.6176, GNorm = 0.5451, lr_0 = 2.3293e-04
Loss = 5.6507e-02, PNorm = 68.6216, GNorm = 0.4360, lr_0 = 2.3277e-04
Loss = 5.6551e-02, PNorm = 68.6262, GNorm = 0.5299, lr_0 = 2.3261e-04
Loss = 5.5967e-02, PNorm = 68.6305, GNorm = 0.5175, lr_0 = 2.3246e-04
Loss = 5.4131e-02, PNorm = 68.6352, GNorm = 0.4021, lr_0 = 2.3230e-04
Loss = 4.8060e-02, PNorm = 68.6396, GNorm = 0.4155, lr_0 = 2.3214e-04
Loss = 6.1233e-02, PNorm = 68.6439, GNorm = 0.6804, lr_0 = 2.3198e-04
Loss = 5.7716e-02, PNorm = 68.6479, GNorm = 0.5560, lr_0 = 2.3182e-04
Loss = 5.4721e-02, PNorm = 68.6531, GNorm = 0.4646, lr_0 = 2.3166e-04
Loss = 5.9691e-02, PNorm = 68.6572, GNorm = 0.5607, lr_0 = 2.3150e-04
Loss = 5.2906e-02, PNorm = 68.6607, GNorm = 0.7165, lr_0 = 2.3134e-04
Loss = 5.2203e-02, PNorm = 68.6622, GNorm = 0.7902, lr_0 = 2.3118e-04
Loss = 5.6795e-02, PNorm = 68.6669, GNorm = 0.9114, lr_0 = 2.3103e-04
Loss = 5.3228e-02, PNorm = 68.6719, GNorm = 0.8730, lr_0 = 2.3087e-04
Loss = 5.5551e-02, PNorm = 68.6755, GNorm = 0.7226, lr_0 = 2.3071e-04
Loss = 5.2218e-02, PNorm = 68.6795, GNorm = 0.4796, lr_0 = 2.3055e-04
Loss = 5.8177e-02, PNorm = 68.6840, GNorm = 0.6801, lr_0 = 2.3039e-04
Loss = 5.4041e-02, PNorm = 68.6888, GNorm = 0.5943, lr_0 = 2.3024e-04
Loss = 4.4940e-02, PNorm = 68.6941, GNorm = 0.6676, lr_0 = 2.3008e-04
Loss = 5.0868e-02, PNorm = 68.6981, GNorm = 0.7124, lr_0 = 2.2992e-04
Loss = 5.9910e-02, PNorm = 68.7023, GNorm = 0.6531, lr_0 = 2.2976e-04
Loss = 6.1709e-02, PNorm = 68.7052, GNorm = 0.6491, lr_0 = 2.2961e-04
Loss = 4.4355e-02, PNorm = 68.7089, GNorm = 0.5525, lr_0 = 2.2945e-04
Loss = 5.1655e-02, PNorm = 68.7127, GNorm = 0.5482, lr_0 = 2.2929e-04
Loss = 4.6414e-02, PNorm = 68.7156, GNorm = 0.5388, lr_0 = 2.2913e-04
Loss = 5.8624e-02, PNorm = 68.7170, GNorm = 0.8710, lr_0 = 2.2898e-04
Loss = 5.3485e-02, PNorm = 68.7175, GNorm = 0.5074, lr_0 = 2.2882e-04
Loss = 4.6855e-02, PNorm = 68.7193, GNorm = 0.5940, lr_0 = 2.2866e-04
Loss = 4.6199e-02, PNorm = 68.7233, GNorm = 0.4636, lr_0 = 2.2851e-04
Loss = 5.4602e-02, PNorm = 68.7254, GNorm = 0.4944, lr_0 = 2.2835e-04
Loss = 4.6126e-02, PNorm = 68.7278, GNorm = 0.4865, lr_0 = 2.2819e-04
Loss = 5.1237e-02, PNorm = 68.7310, GNorm = 0.7167, lr_0 = 2.2804e-04
Loss = 4.2342e-02, PNorm = 68.7350, GNorm = 0.5617, lr_0 = 2.2788e-04
Loss = 5.0044e-02, PNorm = 68.7374, GNorm = 0.5181, lr_0 = 2.2773e-04
Loss = 4.9469e-02, PNorm = 68.7397, GNorm = 0.4640, lr_0 = 2.2757e-04
Validation mae = 0.384587
Epoch 20
Loss = 4.1025e-02, PNorm = 68.7430, GNorm = 0.6550, lr_0 = 2.2741e-04
Loss = 4.7249e-02, PNorm = 68.7470, GNorm = 0.4018, lr_0 = 2.2726e-04
Loss = 4.1628e-02, PNorm = 68.7520, GNorm = 0.6926, lr_0 = 2.2710e-04
Loss = 3.9716e-02, PNorm = 68.7558, GNorm = 0.5423, lr_0 = 2.2695e-04
Loss = 4.5446e-02, PNorm = 68.7596, GNorm = 0.4949, lr_0 = 2.2679e-04
Loss = 4.9028e-02, PNorm = 68.7648, GNorm = 0.5524, lr_0 = 2.2664e-04
Loss = 4.4127e-02, PNorm = 68.7686, GNorm = 0.7709, lr_0 = 2.2648e-04
Loss = 5.6149e-02, PNorm = 68.7736, GNorm = 0.6114, lr_0 = 2.2632e-04
Loss = 4.2293e-02, PNorm = 68.7792, GNorm = 0.5757, lr_0 = 2.2617e-04
Loss = 4.4154e-02, PNorm = 68.7850, GNorm = 0.7364, lr_0 = 2.2601e-04
Loss = 4.4614e-02, PNorm = 68.7902, GNorm = 0.7216, lr_0 = 2.2586e-04
Loss = 4.2841e-02, PNorm = 68.7953, GNorm = 0.5558, lr_0 = 2.2571e-04
Loss = 4.7013e-02, PNorm = 68.8000, GNorm = 0.4653, lr_0 = 2.2555e-04
Loss = 5.2697e-02, PNorm = 68.8036, GNorm = 0.7282, lr_0 = 2.2540e-04
Loss = 4.6425e-02, PNorm = 68.8069, GNorm = 0.6055, lr_0 = 2.2524e-04
Loss = 4.6871e-02, PNorm = 68.8111, GNorm = 0.5291, lr_0 = 2.2509e-04
Loss = 4.1859e-02, PNorm = 68.8161, GNorm = 0.4956, lr_0 = 2.2493e-04
Loss = 4.2950e-02, PNorm = 68.8202, GNorm = 0.4438, lr_0 = 2.2478e-04
Loss = 4.0775e-02, PNorm = 68.8235, GNorm = 0.5478, lr_0 = 2.2463e-04
Loss = 3.8053e-02, PNorm = 68.8256, GNorm = 0.3968, lr_0 = 2.2447e-04
Loss = 4.6889e-02, PNorm = 68.8277, GNorm = 0.4761, lr_0 = 2.2432e-04
Loss = 4.4878e-02, PNorm = 68.8311, GNorm = 0.5909, lr_0 = 2.2416e-04
Loss = 4.8787e-02, PNorm = 68.8357, GNorm = 0.4457, lr_0 = 2.2401e-04
Loss = 4.4267e-02, PNorm = 68.8405, GNorm = 0.6082, lr_0 = 2.2386e-04
Loss = 5.2824e-02, PNorm = 68.8467, GNorm = 0.6277, lr_0 = 2.2370e-04
Loss = 4.4065e-02, PNorm = 68.8522, GNorm = 0.7570, lr_0 = 2.2355e-04
Loss = 4.4141e-02, PNorm = 68.8543, GNorm = 0.3968, lr_0 = 2.2340e-04
Loss = 3.9436e-02, PNorm = 68.8567, GNorm = 0.4511, lr_0 = 2.2324e-04
Loss = 4.6548e-02, PNorm = 68.8602, GNorm = 0.4721, lr_0 = 2.2309e-04
Loss = 4.2785e-02, PNorm = 68.8649, GNorm = 0.4410, lr_0 = 2.2294e-04
Loss = 4.7235e-02, PNorm = 68.8691, GNorm = 0.6086, lr_0 = 2.2279e-04
Loss = 4.4102e-02, PNorm = 68.8728, GNorm = 0.5146, lr_0 = 2.2263e-04
Loss = 4.4179e-02, PNorm = 68.8776, GNorm = 0.5179, lr_0 = 2.2248e-04
Loss = 4.1201e-02, PNorm = 68.8801, GNorm = 0.5590, lr_0 = 2.2233e-04
Loss = 4.5590e-02, PNorm = 68.8822, GNorm = 0.4276, lr_0 = 2.2218e-04
Loss = 4.2505e-02, PNorm = 68.8854, GNorm = 0.6595, lr_0 = 2.2202e-04
Loss = 3.9561e-02, PNorm = 68.8885, GNorm = 0.7673, lr_0 = 2.2187e-04
Loss = 4.9999e-02, PNorm = 68.8929, GNorm = 0.6442, lr_0 = 2.2172e-04
Loss = 4.3987e-02, PNorm = 68.8951, GNorm = 0.5375, lr_0 = 2.2157e-04
Loss = 4.6819e-02, PNorm = 68.8982, GNorm = 0.7099, lr_0 = 2.2142e-04
Loss = 5.6091e-02, PNorm = 68.9013, GNorm = 0.6972, lr_0 = 2.2126e-04
Loss = 3.9056e-02, PNorm = 68.9045, GNorm = 0.4118, lr_0 = 2.2111e-04
Loss = 4.4239e-02, PNorm = 68.9076, GNorm = 0.4621, lr_0 = 2.2096e-04
Loss = 3.8326e-02, PNorm = 68.9101, GNorm = 0.4081, lr_0 = 2.2081e-04
Loss = 4.5479e-02, PNorm = 68.9125, GNorm = 0.5044, lr_0 = 2.2066e-04
Loss = 4.7079e-02, PNorm = 68.9151, GNorm = 0.6824, lr_0 = 2.2051e-04
Loss = 5.1188e-02, PNorm = 68.9188, GNorm = 0.6227, lr_0 = 2.2036e-04
Loss = 4.5978e-02, PNorm = 68.9227, GNorm = 0.5281, lr_0 = 2.2021e-04
Loss = 5.1570e-02, PNorm = 68.9251, GNorm = 0.4634, lr_0 = 2.2005e-04
Loss = 5.3349e-02, PNorm = 68.9283, GNorm = 0.4150, lr_0 = 2.1990e-04
Loss = 4.9272e-02, PNorm = 68.9343, GNorm = 0.3780, lr_0 = 2.1975e-04
Loss = 5.0453e-02, PNorm = 68.9389, GNorm = 0.4833, lr_0 = 2.1960e-04
Loss = 4.8579e-02, PNorm = 68.9441, GNorm = 0.5828, lr_0 = 2.1945e-04
Loss = 4.7539e-02, PNorm = 68.9485, GNorm = 0.4708, lr_0 = 2.1930e-04
Loss = 5.1071e-02, PNorm = 68.9519, GNorm = 0.6865, lr_0 = 2.1915e-04
Loss = 5.0927e-02, PNorm = 68.9549, GNorm = 0.7726, lr_0 = 2.1900e-04
Loss = 4.8112e-02, PNorm = 68.9593, GNorm = 0.5133, lr_0 = 2.1885e-04
Loss = 5.7055e-02, PNorm = 68.9653, GNorm = 0.9708, lr_0 = 2.1870e-04
Loss = 4.7311e-02, PNorm = 68.9696, GNorm = 0.4733, lr_0 = 2.1855e-04
Loss = 4.3409e-02, PNorm = 68.9730, GNorm = 0.6828, lr_0 = 2.1840e-04
Loss = 4.2123e-02, PNorm = 68.9753, GNorm = 0.3844, lr_0 = 2.1825e-04
Loss = 4.5962e-02, PNorm = 68.9787, GNorm = 0.5952, lr_0 = 2.1810e-04
Loss = 4.9087e-02, PNorm = 68.9843, GNorm = 0.5013, lr_0 = 2.1795e-04
Loss = 5.7055e-02, PNorm = 68.9895, GNorm = 0.6241, lr_0 = 2.1780e-04
Loss = 4.9632e-02, PNorm = 68.9951, GNorm = 0.7878, lr_0 = 2.1765e-04
Loss = 4.5044e-02, PNorm = 68.9995, GNorm = 0.4808, lr_0 = 2.1751e-04
Loss = 4.7499e-02, PNorm = 69.0030, GNorm = 0.7262, lr_0 = 2.1736e-04
Loss = 4.4318e-02, PNorm = 69.0057, GNorm = 0.5406, lr_0 = 2.1721e-04
Loss = 5.1822e-02, PNorm = 69.0090, GNorm = 0.5127, lr_0 = 2.1706e-04
Loss = 5.2484e-02, PNorm = 69.0139, GNorm = 0.5457, lr_0 = 2.1691e-04
Loss = 5.6706e-02, PNorm = 69.0180, GNorm = 0.9727, lr_0 = 2.1676e-04
Loss = 4.4609e-02, PNorm = 69.0225, GNorm = 0.9545, lr_0 = 2.1661e-04
Loss = 5.0298e-02, PNorm = 69.0259, GNorm = 0.5885, lr_0 = 2.1646e-04
Loss = 3.6937e-02, PNorm = 69.0281, GNorm = 0.4410, lr_0 = 2.1632e-04
Loss = 4.8167e-02, PNorm = 69.0304, GNorm = 0.5351, lr_0 = 2.1617e-04
Loss = 4.9992e-02, PNorm = 69.0338, GNorm = 0.4494, lr_0 = 2.1602e-04
Loss = 4.5904e-02, PNorm = 69.0389, GNorm = 0.6937, lr_0 = 2.1587e-04
Loss = 5.0017e-02, PNorm = 69.0442, GNorm = 0.5752, lr_0 = 2.1572e-04
Loss = 5.1727e-02, PNorm = 69.0477, GNorm = 0.5097, lr_0 = 2.1558e-04
Loss = 4.5470e-02, PNorm = 69.0515, GNorm = 0.5632, lr_0 = 2.1543e-04
Loss = 4.3324e-02, PNorm = 69.0541, GNorm = 0.5693, lr_0 = 2.1528e-04
Loss = 5.2623e-02, PNorm = 69.0563, GNorm = 0.6236, lr_0 = 2.1513e-04
Loss = 4.6602e-02, PNorm = 69.0593, GNorm = 0.6718, lr_0 = 2.1499e-04
Loss = 4.8188e-02, PNorm = 69.0619, GNorm = 0.5461, lr_0 = 2.1484e-04
Loss = 5.0158e-02, PNorm = 69.0647, GNorm = 0.4585, lr_0 = 2.1469e-04
Loss = 4.3666e-02, PNorm = 69.0677, GNorm = 0.8241, lr_0 = 2.1454e-04
Loss = 4.8239e-02, PNorm = 69.0724, GNorm = 0.5247, lr_0 = 2.1440e-04
Loss = 4.5453e-02, PNorm = 69.0770, GNorm = 0.5362, lr_0 = 2.1425e-04
Loss = 5.5348e-02, PNorm = 69.0807, GNorm = 0.6846, lr_0 = 2.1410e-04
Loss = 4.9419e-02, PNorm = 69.0843, GNorm = 0.5583, lr_0 = 2.1396e-04
Loss = 4.9436e-02, PNorm = 69.0887, GNorm = 0.7738, lr_0 = 2.1381e-04
Loss = 4.9768e-02, PNorm = 69.0918, GNorm = 0.5501, lr_0 = 2.1366e-04
Loss = 5.6416e-02, PNorm = 69.0949, GNorm = 0.5472, lr_0 = 2.1352e-04
Loss = 5.2225e-02, PNorm = 69.0965, GNorm = 0.3384, lr_0 = 2.1337e-04
Loss = 5.0210e-02, PNorm = 69.0994, GNorm = 0.6438, lr_0 = 2.1323e-04
Loss = 4.9271e-02, PNorm = 69.1024, GNorm = 0.4128, lr_0 = 2.1308e-04
Loss = 5.3572e-02, PNorm = 69.1077, GNorm = 0.5421, lr_0 = 2.1293e-04
Loss = 5.0565e-02, PNorm = 69.1101, GNorm = 0.5801, lr_0 = 2.1279e-04
Loss = 4.7778e-02, PNorm = 69.1120, GNorm = 0.5700, lr_0 = 2.1264e-04
Loss = 4.7377e-02, PNorm = 69.1151, GNorm = 0.4155, lr_0 = 2.1250e-04
Loss = 4.7051e-02, PNorm = 69.1178, GNorm = 0.6574, lr_0 = 2.1235e-04
Loss = 6.0409e-02, PNorm = 69.1216, GNorm = 0.5808, lr_0 = 2.1221e-04
Loss = 5.7943e-02, PNorm = 69.1263, GNorm = 0.5159, lr_0 = 2.1206e-04
Loss = 5.2163e-02, PNorm = 69.1311, GNorm = 0.4802, lr_0 = 2.1191e-04
Loss = 4.8041e-02, PNorm = 69.1364, GNorm = 0.5254, lr_0 = 2.1177e-04
Loss = 5.8324e-02, PNorm = 69.1401, GNorm = 0.5618, lr_0 = 2.1162e-04
Loss = 4.7855e-02, PNorm = 69.1438, GNorm = 0.6202, lr_0 = 2.1148e-04
Loss = 4.9362e-02, PNorm = 69.1480, GNorm = 0.4157, lr_0 = 2.1133e-04
Loss = 4.2174e-02, PNorm = 69.1517, GNorm = 0.4712, lr_0 = 2.1119e-04
Loss = 4.6542e-02, PNorm = 69.1539, GNorm = 0.4699, lr_0 = 2.1104e-04
Loss = 5.5275e-02, PNorm = 69.1570, GNorm = 0.5727, lr_0 = 2.1090e-04
Loss = 4.8719e-02, PNorm = 69.1597, GNorm = 0.9139, lr_0 = 2.1076e-04
Loss = 4.7729e-02, PNorm = 69.1627, GNorm = 0.4378, lr_0 = 2.1061e-04
Loss = 4.3167e-02, PNorm = 69.1681, GNorm = 0.5305, lr_0 = 2.1047e-04
Loss = 5.6441e-02, PNorm = 69.1732, GNorm = 0.5336, lr_0 = 2.1032e-04
Loss = 5.5263e-02, PNorm = 69.1760, GNorm = 0.5861, lr_0 = 2.1018e-04
Loss = 4.5664e-02, PNorm = 69.1792, GNorm = 0.7658, lr_0 = 2.1003e-04
Loss = 5.3951e-02, PNorm = 69.1828, GNorm = 0.7937, lr_0 = 2.0989e-04
Loss = 4.8921e-02, PNorm = 69.1844, GNorm = 0.5150, lr_0 = 2.0975e-04
Loss = 4.5158e-02, PNorm = 69.1864, GNorm = 0.6308, lr_0 = 2.0960e-04
Validation mae = 0.385286
Epoch 21
Loss = 5.6597e-02, PNorm = 69.1896, GNorm = 1.0749, lr_0 = 2.0946e-04
Loss = 4.5991e-02, PNorm = 69.1934, GNorm = 0.6448, lr_0 = 2.0932e-04
Loss = 4.5110e-02, PNorm = 69.1987, GNorm = 0.5238, lr_0 = 2.0917e-04
Loss = 4.5161e-02, PNorm = 69.2020, GNorm = 0.4854, lr_0 = 2.0903e-04
Loss = 4.1993e-02, PNorm = 69.2043, GNorm = 0.7476, lr_0 = 2.0889e-04
Loss = 4.1554e-02, PNorm = 69.2080, GNorm = 0.4467, lr_0 = 2.0874e-04
Loss = 4.0895e-02, PNorm = 69.2105, GNorm = 0.4582, lr_0 = 2.0860e-04
Loss = 4.0525e-02, PNorm = 69.2127, GNorm = 0.4162, lr_0 = 2.0846e-04
Loss = 3.9968e-02, PNorm = 69.2147, GNorm = 0.5010, lr_0 = 2.0831e-04
Loss = 4.5549e-02, PNorm = 69.2177, GNorm = 0.5509, lr_0 = 2.0817e-04
Loss = 4.5913e-02, PNorm = 69.2214, GNorm = 0.5961, lr_0 = 2.0803e-04
Loss = 4.8533e-02, PNorm = 69.2238, GNorm = 0.4860, lr_0 = 2.0789e-04
Loss = 4.7059e-02, PNorm = 69.2276, GNorm = 0.5486, lr_0 = 2.0774e-04
Loss = 4.1476e-02, PNorm = 69.2330, GNorm = 0.5150, lr_0 = 2.0760e-04
Loss = 4.3162e-02, PNorm = 69.2375, GNorm = 0.3880, lr_0 = 2.0746e-04
Loss = 4.3698e-02, PNorm = 69.2411, GNorm = 0.4381, lr_0 = 2.0732e-04
Loss = 5.1056e-02, PNorm = 69.2428, GNorm = 0.4972, lr_0 = 2.0718e-04
Loss = 4.2832e-02, PNorm = 69.2468, GNorm = 0.3868, lr_0 = 2.0703e-04
Loss = 4.3161e-02, PNorm = 69.2515, GNorm = 0.6085, lr_0 = 2.0689e-04
Loss = 5.0829e-02, PNorm = 69.2569, GNorm = 0.6746, lr_0 = 2.0675e-04
Loss = 3.9008e-02, PNorm = 69.2614, GNorm = 0.5443, lr_0 = 2.0661e-04
Loss = 4.0909e-02, PNorm = 69.2645, GNorm = 0.5260, lr_0 = 2.0647e-04
Loss = 4.1470e-02, PNorm = 69.2684, GNorm = 0.4866, lr_0 = 2.0633e-04
Loss = 3.9463e-02, PNorm = 69.2735, GNorm = 0.4306, lr_0 = 2.0618e-04
Loss = 4.1971e-02, PNorm = 69.2773, GNorm = 0.3797, lr_0 = 2.0604e-04
Loss = 4.2134e-02, PNorm = 69.2815, GNorm = 0.5361, lr_0 = 2.0590e-04
Loss = 4.4163e-02, PNorm = 69.2849, GNorm = 0.7735, lr_0 = 2.0576e-04
Loss = 5.1554e-02, PNorm = 69.2878, GNorm = 0.5063, lr_0 = 2.0562e-04
Loss = 4.5071e-02, PNorm = 69.2905, GNorm = 0.7949, lr_0 = 2.0548e-04
Loss = 4.4745e-02, PNorm = 69.2931, GNorm = 0.5360, lr_0 = 2.0534e-04
Loss = 4.5449e-02, PNorm = 69.2966, GNorm = 0.9408, lr_0 = 2.0520e-04
Loss = 4.2363e-02, PNorm = 69.3000, GNorm = 0.8461, lr_0 = 2.0506e-04
Loss = 5.1252e-02, PNorm = 69.3044, GNorm = 0.4683, lr_0 = 2.0492e-04
Loss = 3.8061e-02, PNorm = 69.3087, GNorm = 0.5248, lr_0 = 2.0478e-04
Loss = 4.4094e-02, PNorm = 69.3114, GNorm = 0.5135, lr_0 = 2.0464e-04
Loss = 3.8486e-02, PNorm = 69.3135, GNorm = 0.3566, lr_0 = 2.0450e-04
Loss = 4.6028e-02, PNorm = 69.3166, GNorm = 0.4955, lr_0 = 2.0436e-04
Loss = 4.6368e-02, PNorm = 69.3189, GNorm = 0.6182, lr_0 = 2.0422e-04
Loss = 5.1793e-02, PNorm = 69.3224, GNorm = 0.4775, lr_0 = 2.0408e-04
Loss = 4.1234e-02, PNorm = 69.3270, GNorm = 0.5844, lr_0 = 2.0394e-04
Loss = 4.3672e-02, PNorm = 69.3305, GNorm = 0.6605, lr_0 = 2.0380e-04
Loss = 3.9655e-02, PNorm = 69.3347, GNorm = 0.5858, lr_0 = 2.0366e-04
Loss = 4.5499e-02, PNorm = 69.3380, GNorm = 0.5750, lr_0 = 2.0352e-04
Loss = 4.4952e-02, PNorm = 69.3412, GNorm = 0.6517, lr_0 = 2.0338e-04
Loss = 4.6927e-02, PNorm = 69.3450, GNorm = 0.6220, lr_0 = 2.0324e-04
Loss = 4.4524e-02, PNorm = 69.3487, GNorm = 0.4645, lr_0 = 2.0310e-04
Loss = 3.7258e-02, PNorm = 69.3513, GNorm = 0.9330, lr_0 = 2.0296e-04
Loss = 4.9380e-02, PNorm = 69.3553, GNorm = 0.4396, lr_0 = 2.0282e-04
Loss = 4.2833e-02, PNorm = 69.3592, GNorm = 0.4271, lr_0 = 2.0268e-04
Loss = 4.0492e-02, PNorm = 69.3626, GNorm = 0.5688, lr_0 = 2.0254e-04
Loss = 4.5859e-02, PNorm = 69.3670, GNorm = 0.5104, lr_0 = 2.0240e-04
Loss = 4.5149e-02, PNorm = 69.3702, GNorm = 0.4127, lr_0 = 2.0227e-04
Loss = 4.2647e-02, PNorm = 69.3735, GNorm = 0.6805, lr_0 = 2.0213e-04
Loss = 4.8879e-02, PNorm = 69.3768, GNorm = 0.5038, lr_0 = 2.0199e-04
Loss = 4.6961e-02, PNorm = 69.3808, GNorm = 0.5422, lr_0 = 2.0185e-04
Loss = 4.3429e-02, PNorm = 69.3844, GNorm = 0.5344, lr_0 = 2.0171e-04
Loss = 4.4322e-02, PNorm = 69.3884, GNorm = 0.5004, lr_0 = 2.0157e-04
Loss = 3.7136e-02, PNorm = 69.3905, GNorm = 0.4746, lr_0 = 2.0144e-04
Loss = 3.6132e-02, PNorm = 69.3922, GNorm = 0.4292, lr_0 = 2.0130e-04
Loss = 4.9425e-02, PNorm = 69.3957, GNorm = 0.6022, lr_0 = 2.0116e-04
Loss = 4.1865e-02, PNorm = 69.3992, GNorm = 0.5315, lr_0 = 2.0102e-04
Loss = 4.4758e-02, PNorm = 69.4008, GNorm = 0.4696, lr_0 = 2.0088e-04
Loss = 4.9555e-02, PNorm = 69.4022, GNorm = 0.4546, lr_0 = 2.0075e-04
Loss = 3.7940e-02, PNorm = 69.4055, GNorm = 0.4917, lr_0 = 2.0061e-04
Loss = 4.1012e-02, PNorm = 69.4099, GNorm = 0.4966, lr_0 = 2.0047e-04
Loss = 4.9282e-02, PNorm = 69.4138, GNorm = 0.5407, lr_0 = 2.0033e-04
Loss = 5.3160e-02, PNorm = 69.4191, GNorm = 0.5296, lr_0 = 2.0020e-04
Loss = 4.0357e-02, PNorm = 69.4231, GNorm = 0.4331, lr_0 = 2.0006e-04
Loss = 5.2639e-02, PNorm = 69.4263, GNorm = 0.6167, lr_0 = 1.9992e-04
Loss = 4.4039e-02, PNorm = 69.4300, GNorm = 0.4616, lr_0 = 1.9979e-04
Loss = 4.3609e-02, PNorm = 69.4335, GNorm = 0.5153, lr_0 = 1.9965e-04
Loss = 5.2521e-02, PNorm = 69.4367, GNorm = 0.6337, lr_0 = 1.9951e-04
Loss = 4.0780e-02, PNorm = 69.4413, GNorm = 0.4861, lr_0 = 1.9938e-04
Loss = 4.8071e-02, PNorm = 69.4433, GNorm = 0.7499, lr_0 = 1.9924e-04
Loss = 4.7470e-02, PNorm = 69.4472, GNorm = 0.4256, lr_0 = 1.9910e-04
Loss = 5.5698e-02, PNorm = 69.4520, GNorm = 0.7243, lr_0 = 1.9897e-04
Loss = 4.6582e-02, PNorm = 69.4568, GNorm = 0.5832, lr_0 = 1.9883e-04
Loss = 4.5797e-02, PNorm = 69.4592, GNorm = 0.4935, lr_0 = 1.9869e-04
Loss = 4.2986e-02, PNorm = 69.4612, GNorm = 0.7654, lr_0 = 1.9856e-04
Loss = 4.9364e-02, PNorm = 69.4642, GNorm = 0.5916, lr_0 = 1.9842e-04
Loss = 4.2813e-02, PNorm = 69.4684, GNorm = 0.5041, lr_0 = 1.9829e-04
Loss = 4.7030e-02, PNorm = 69.4717, GNorm = 0.5775, lr_0 = 1.9815e-04
Loss = 4.3567e-02, PNorm = 69.4736, GNorm = 0.5865, lr_0 = 1.9801e-04
Loss = 4.5164e-02, PNorm = 69.4767, GNorm = 0.5896, lr_0 = 1.9788e-04
Loss = 3.8891e-02, PNorm = 69.4799, GNorm = 0.6492, lr_0 = 1.9774e-04
Loss = 4.9999e-02, PNorm = 69.4819, GNorm = 0.9033, lr_0 = 1.9761e-04
Loss = 4.1118e-02, PNorm = 69.4835, GNorm = 0.7358, lr_0 = 1.9747e-04
Loss = 4.1644e-02, PNorm = 69.4867, GNorm = 0.6113, lr_0 = 1.9734e-04
Loss = 4.9531e-02, PNorm = 69.4894, GNorm = 0.5586, lr_0 = 1.9720e-04
Loss = 4.0779e-02, PNorm = 69.4914, GNorm = 0.5526, lr_0 = 1.9707e-04
Loss = 4.5223e-02, PNorm = 69.4947, GNorm = 0.6750, lr_0 = 1.9693e-04
Loss = 3.9906e-02, PNorm = 69.4984, GNorm = 0.3372, lr_0 = 1.9680e-04
Loss = 4.5924e-02, PNorm = 69.5023, GNorm = 0.6715, lr_0 = 1.9666e-04
Loss = 4.0296e-02, PNorm = 69.5036, GNorm = 0.4166, lr_0 = 1.9653e-04
Loss = 4.2936e-02, PNorm = 69.5057, GNorm = 0.7047, lr_0 = 1.9639e-04
Loss = 4.6529e-02, PNorm = 69.5089, GNorm = 0.5312, lr_0 = 1.9626e-04
Loss = 4.3756e-02, PNorm = 69.5109, GNorm = 0.5051, lr_0 = 1.9612e-04
Loss = 4.6857e-02, PNorm = 69.5126, GNorm = 0.5228, lr_0 = 1.9599e-04
Loss = 4.5225e-02, PNorm = 69.5149, GNorm = 0.6469, lr_0 = 1.9585e-04
Loss = 4.5872e-02, PNorm = 69.5178, GNorm = 0.5134, lr_0 = 1.9572e-04
Loss = 4.1191e-02, PNorm = 69.5204, GNorm = 0.6820, lr_0 = 1.9559e-04
Loss = 4.3276e-02, PNorm = 69.5232, GNorm = 0.4370, lr_0 = 1.9545e-04
Loss = 4.7432e-02, PNorm = 69.5258, GNorm = 0.4405, lr_0 = 1.9532e-04
Loss = 4.8675e-02, PNorm = 69.5294, GNorm = 0.4137, lr_0 = 1.9518e-04
Loss = 4.6330e-02, PNorm = 69.5318, GNorm = 0.8601, lr_0 = 1.9505e-04
Loss = 4.8079e-02, PNorm = 69.5337, GNorm = 0.7055, lr_0 = 1.9492e-04
Loss = 4.8730e-02, PNorm = 69.5358, GNorm = 0.7808, lr_0 = 1.9478e-04
Loss = 4.6691e-02, PNorm = 69.5399, GNorm = 0.7056, lr_0 = 1.9465e-04
Loss = 4.6535e-02, PNorm = 69.5449, GNorm = 0.5468, lr_0 = 1.9452e-04
Loss = 5.3374e-02, PNorm = 69.5471, GNorm = 0.5708, lr_0 = 1.9438e-04
Loss = 4.6019e-02, PNorm = 69.5489, GNorm = 0.5012, lr_0 = 1.9425e-04
Loss = 4.5451e-02, PNorm = 69.5517, GNorm = 0.5231, lr_0 = 1.9412e-04
Loss = 4.4674e-02, PNorm = 69.5529, GNorm = 0.4266, lr_0 = 1.9398e-04
Loss = 5.1900e-02, PNorm = 69.5545, GNorm = 0.4826, lr_0 = 1.9385e-04
Loss = 4.1175e-02, PNorm = 69.5572, GNorm = 0.6942, lr_0 = 1.9372e-04
Loss = 4.1217e-02, PNorm = 69.5598, GNorm = 0.3852, lr_0 = 1.9359e-04
Loss = 4.9843e-02, PNorm = 69.5634, GNorm = 0.4895, lr_0 = 1.9345e-04
Loss = 4.2890e-02, PNorm = 69.5669, GNorm = 0.7430, lr_0 = 1.9332e-04
Loss = 5.0921e-02, PNorm = 69.5702, GNorm = 0.5110, lr_0 = 1.9319e-04
Loss = 5.8024e-02, PNorm = 69.5725, GNorm = 0.9552, lr_0 = 1.9306e-04
Validation mae = 0.389088
Epoch 22
Loss = 3.9889e-02, PNorm = 69.5760, GNorm = 0.5755, lr_0 = 1.9292e-04
Loss = 3.6718e-02, PNorm = 69.5791, GNorm = 0.6968, lr_0 = 1.9279e-04
Loss = 3.8371e-02, PNorm = 69.5829, GNorm = 0.4586, lr_0 = 1.9266e-04
Loss = 3.2641e-02, PNorm = 69.5863, GNorm = 0.4139, lr_0 = 1.9253e-04
Loss = 3.7291e-02, PNorm = 69.5906, GNorm = 0.4824, lr_0 = 1.9240e-04
Loss = 4.0311e-02, PNorm = 69.5939, GNorm = 0.5614, lr_0 = 1.9226e-04
Loss = 3.7124e-02, PNorm = 69.5982, GNorm = 0.6204, lr_0 = 1.9213e-04
Loss = 3.7014e-02, PNorm = 69.6018, GNorm = 0.8224, lr_0 = 1.9200e-04
Loss = 4.1774e-02, PNorm = 69.6046, GNorm = 0.5217, lr_0 = 1.9187e-04
Loss = 4.1643e-02, PNorm = 69.6073, GNorm = 0.4691, lr_0 = 1.9174e-04
Loss = 4.3301e-02, PNorm = 69.6093, GNorm = 0.6353, lr_0 = 1.9161e-04
Loss = 4.0730e-02, PNorm = 69.6121, GNorm = 0.6007, lr_0 = 1.9148e-04
Loss = 3.6457e-02, PNorm = 69.6152, GNorm = 0.7455, lr_0 = 1.9134e-04
Loss = 3.7669e-02, PNorm = 69.6178, GNorm = 0.4614, lr_0 = 1.9121e-04
Loss = 4.7078e-02, PNorm = 69.6215, GNorm = 0.8798, lr_0 = 1.9108e-04
Loss = 3.6757e-02, PNorm = 69.6257, GNorm = 0.5404, lr_0 = 1.9095e-04
Loss = 3.5385e-02, PNorm = 69.6285, GNorm = 0.4939, lr_0 = 1.9082e-04
Loss = 4.2829e-02, PNorm = 69.6314, GNorm = 0.4320, lr_0 = 1.9069e-04
Loss = 4.5977e-02, PNorm = 69.6336, GNorm = 0.6412, lr_0 = 1.9056e-04
Loss = 4.6150e-02, PNorm = 69.6358, GNorm = 1.0563, lr_0 = 1.9043e-04
Loss = 3.9713e-02, PNorm = 69.6391, GNorm = 0.5057, lr_0 = 1.9030e-04
Loss = 3.8112e-02, PNorm = 69.6418, GNorm = 0.4381, lr_0 = 1.9017e-04
Loss = 4.0125e-02, PNorm = 69.6445, GNorm = 0.4790, lr_0 = 1.9004e-04
Loss = 4.8889e-02, PNorm = 69.6459, GNorm = 0.5472, lr_0 = 1.8991e-04
Loss = 4.2532e-02, PNorm = 69.6483, GNorm = 0.5576, lr_0 = 1.8978e-04
Loss = 4.8417e-02, PNorm = 69.6520, GNorm = 0.4619, lr_0 = 1.8965e-04
Loss = 4.4726e-02, PNorm = 69.6563, GNorm = 0.4799, lr_0 = 1.8952e-04
Loss = 5.3954e-02, PNorm = 69.6605, GNorm = 0.9754, lr_0 = 1.8939e-04
Loss = 4.7332e-02, PNorm = 69.6638, GNorm = 0.5339, lr_0 = 1.8926e-04
Loss = 4.1432e-02, PNorm = 69.6658, GNorm = 0.7484, lr_0 = 1.8913e-04
Loss = 4.6771e-02, PNorm = 69.6683, GNorm = 0.6248, lr_0 = 1.8900e-04
Loss = 4.3711e-02, PNorm = 69.6721, GNorm = 0.4780, lr_0 = 1.8887e-04
Loss = 4.2528e-02, PNorm = 69.6747, GNorm = 0.4147, lr_0 = 1.8874e-04
Loss = 3.6544e-02, PNorm = 69.6762, GNorm = 0.5482, lr_0 = 1.8861e-04
Loss = 3.7544e-02, PNorm = 69.6774, GNorm = 0.4788, lr_0 = 1.8848e-04
Loss = 3.8193e-02, PNorm = 69.6800, GNorm = 0.4221, lr_0 = 1.8835e-04
Loss = 3.7020e-02, PNorm = 69.6854, GNorm = 0.4921, lr_0 = 1.8822e-04
Loss = 3.6173e-02, PNorm = 69.6893, GNorm = 0.4128, lr_0 = 1.8809e-04
Loss = 3.8570e-02, PNorm = 69.6936, GNorm = 0.3455, lr_0 = 1.8797e-04
Loss = 4.1772e-02, PNorm = 69.6972, GNorm = 0.7998, lr_0 = 1.8784e-04
Loss = 4.3065e-02, PNorm = 69.6990, GNorm = 0.5590, lr_0 = 1.8771e-04
Loss = 4.1566e-02, PNorm = 69.7014, GNorm = 0.5626, lr_0 = 1.8758e-04
Loss = 4.0979e-02, PNorm = 69.7032, GNorm = 0.5334, lr_0 = 1.8745e-04
Loss = 4.0924e-02, PNorm = 69.7053, GNorm = 0.6557, lr_0 = 1.8732e-04
Loss = 3.9527e-02, PNorm = 69.7081, GNorm = 0.6392, lr_0 = 1.8719e-04
Loss = 4.9185e-02, PNorm = 69.7096, GNorm = 0.4812, lr_0 = 1.8707e-04
Loss = 4.6903e-02, PNorm = 69.7133, GNorm = 0.6044, lr_0 = 1.8694e-04
Loss = 5.4170e-02, PNorm = 69.7178, GNorm = 0.4731, lr_0 = 1.8681e-04
Loss = 4.8686e-02, PNorm = 69.7228, GNorm = 0.4651, lr_0 = 1.8668e-04
Loss = 5.0843e-02, PNorm = 69.7260, GNorm = 0.4782, lr_0 = 1.8655e-04
Loss = 4.6453e-02, PNorm = 69.7299, GNorm = 0.7529, lr_0 = 1.8643e-04
Loss = 3.8783e-02, PNorm = 69.7337, GNorm = 0.5694, lr_0 = 1.8630e-04
Loss = 5.0296e-02, PNorm = 69.7362, GNorm = 0.3782, lr_0 = 1.8617e-04
Loss = 3.9537e-02, PNorm = 69.7403, GNorm = 0.5659, lr_0 = 1.8604e-04
Loss = 4.4568e-02, PNorm = 69.7453, GNorm = 0.4529, lr_0 = 1.8592e-04
Loss = 4.8124e-02, PNorm = 69.7499, GNorm = 0.6226, lr_0 = 1.8579e-04
Loss = 3.6539e-02, PNorm = 69.7534, GNorm = 0.4539, lr_0 = 1.8566e-04
Loss = 4.0076e-02, PNorm = 69.7560, GNorm = 0.3582, lr_0 = 1.8553e-04
Loss = 3.7405e-02, PNorm = 69.7594, GNorm = 0.5278, lr_0 = 1.8541e-04
Loss = 3.7147e-02, PNorm = 69.7605, GNorm = 0.7420, lr_0 = 1.8528e-04
Loss = 4.1436e-02, PNorm = 69.7629, GNorm = 0.5981, lr_0 = 1.8515e-04
Loss = 4.4190e-02, PNorm = 69.7648, GNorm = 0.7189, lr_0 = 1.8503e-04
Loss = 4.6013e-02, PNorm = 69.7682, GNorm = 0.4834, lr_0 = 1.8490e-04
Loss = 4.3845e-02, PNorm = 69.7714, GNorm = 0.5823, lr_0 = 1.8477e-04
Loss = 4.4699e-02, PNorm = 69.7743, GNorm = 0.3910, lr_0 = 1.8465e-04
Loss = 4.5801e-02, PNorm = 69.7778, GNorm = 0.4517, lr_0 = 1.8452e-04
Loss = 4.7529e-02, PNorm = 69.7784, GNorm = 0.6744, lr_0 = 1.8439e-04
Loss = 3.6302e-02, PNorm = 69.7789, GNorm = 0.4341, lr_0 = 1.8427e-04
Loss = 3.9300e-02, PNorm = 69.7809, GNorm = 0.5330, lr_0 = 1.8414e-04
Loss = 3.9165e-02, PNorm = 69.7843, GNorm = 0.3910, lr_0 = 1.8401e-04
Loss = 4.1001e-02, PNorm = 69.7863, GNorm = 0.5683, lr_0 = 1.8389e-04
Loss = 4.6158e-02, PNorm = 69.7880, GNorm = 0.6227, lr_0 = 1.8376e-04
Loss = 4.0461e-02, PNorm = 69.7902, GNorm = 0.5315, lr_0 = 1.8364e-04
Loss = 4.8081e-02, PNorm = 69.7936, GNorm = 0.5508, lr_0 = 1.8351e-04
Loss = 3.8887e-02, PNorm = 69.7973, GNorm = 0.5295, lr_0 = 1.8338e-04
Loss = 3.7799e-02, PNorm = 69.7992, GNorm = 0.5234, lr_0 = 1.8326e-04
Loss = 4.3697e-02, PNorm = 69.8010, GNorm = 0.5906, lr_0 = 1.8313e-04
Loss = 4.1429e-02, PNorm = 69.8025, GNorm = 0.5179, lr_0 = 1.8301e-04
Loss = 4.0776e-02, PNorm = 69.8034, GNorm = 0.5634, lr_0 = 1.8288e-04
Loss = 4.2675e-02, PNorm = 69.8065, GNorm = 0.4531, lr_0 = 1.8276e-04
Loss = 3.8364e-02, PNorm = 69.8093, GNorm = 0.4332, lr_0 = 1.8263e-04
Loss = 3.8512e-02, PNorm = 69.8126, GNorm = 0.6395, lr_0 = 1.8251e-04
Loss = 4.5276e-02, PNorm = 69.8175, GNorm = 0.4699, lr_0 = 1.8238e-04
Loss = 4.1294e-02, PNorm = 69.8216, GNorm = 0.4489, lr_0 = 1.8226e-04
Loss = 3.9562e-02, PNorm = 69.8235, GNorm = 0.5896, lr_0 = 1.8213e-04
Loss = 4.1108e-02, PNorm = 69.8260, GNorm = 0.4780, lr_0 = 1.8201e-04
Loss = 4.6202e-02, PNorm = 69.8300, GNorm = 0.5846, lr_0 = 1.8188e-04
Loss = 4.1234e-02, PNorm = 69.8326, GNorm = 0.5864, lr_0 = 1.8176e-04
Loss = 4.6513e-02, PNorm = 69.8356, GNorm = 0.4558, lr_0 = 1.8163e-04
Loss = 5.0045e-02, PNorm = 69.8390, GNorm = 0.6595, lr_0 = 1.8151e-04
Loss = 3.9976e-02, PNorm = 69.8423, GNorm = 0.4492, lr_0 = 1.8138e-04
Loss = 4.6616e-02, PNorm = 69.8438, GNorm = 0.3972, lr_0 = 1.8126e-04
Loss = 3.9951e-02, PNorm = 69.8451, GNorm = 0.4936, lr_0 = 1.8114e-04
Loss = 4.7487e-02, PNorm = 69.8470, GNorm = 0.4771, lr_0 = 1.8101e-04
Loss = 4.6529e-02, PNorm = 69.8494, GNorm = 0.5360, lr_0 = 1.8089e-04
Loss = 5.1558e-02, PNorm = 69.8522, GNorm = 0.8140, lr_0 = 1.8076e-04
Loss = 4.0653e-02, PNorm = 69.8551, GNorm = 0.4894, lr_0 = 1.8064e-04
Loss = 4.0740e-02, PNorm = 69.8582, GNorm = 0.8067, lr_0 = 1.8052e-04
Loss = 4.0600e-02, PNorm = 69.8612, GNorm = 0.5242, lr_0 = 1.8039e-04
Loss = 4.1480e-02, PNorm = 69.8644, GNorm = 0.5794, lr_0 = 1.8027e-04
Loss = 4.5225e-02, PNorm = 69.8672, GNorm = 0.4714, lr_0 = 1.8015e-04
Loss = 5.7731e-02, PNorm = 69.8695, GNorm = 0.5980, lr_0 = 1.8002e-04
Loss = 4.2010e-02, PNorm = 69.8715, GNorm = 0.4117, lr_0 = 1.7990e-04
Loss = 4.4477e-02, PNorm = 69.8751, GNorm = 0.5185, lr_0 = 1.7978e-04
Loss = 4.4056e-02, PNorm = 69.8787, GNorm = 0.5797, lr_0 = 1.7965e-04
Loss = 3.6029e-02, PNorm = 69.8825, GNorm = 0.3416, lr_0 = 1.7953e-04
Loss = 4.4983e-02, PNorm = 69.8859, GNorm = 0.6590, lr_0 = 1.7941e-04
Loss = 4.9285e-02, PNorm = 69.8886, GNorm = 0.7161, lr_0 = 1.7928e-04
Loss = 4.8770e-02, PNorm = 69.8905, GNorm = 0.4414, lr_0 = 1.7916e-04
Loss = 3.7039e-02, PNorm = 69.8934, GNorm = 0.4691, lr_0 = 1.7904e-04
Loss = 3.9518e-02, PNorm = 69.8959, GNorm = 0.5397, lr_0 = 1.7892e-04
Loss = 4.4315e-02, PNorm = 69.8984, GNorm = 0.3967, lr_0 = 1.7879e-04
Loss = 4.0266e-02, PNorm = 69.9005, GNorm = 0.5385, lr_0 = 1.7867e-04
Loss = 3.7126e-02, PNorm = 69.9027, GNorm = 0.5956, lr_0 = 1.7855e-04
Loss = 3.9570e-02, PNorm = 69.9048, GNorm = 0.4978, lr_0 = 1.7843e-04
Loss = 4.1266e-02, PNorm = 69.9071, GNorm = 0.3892, lr_0 = 1.7830e-04
Loss = 4.1973e-02, PNorm = 69.9091, GNorm = 0.5963, lr_0 = 1.7818e-04
Loss = 5.1307e-02, PNorm = 69.9124, GNorm = 0.5597, lr_0 = 1.7806e-04
Loss = 4.2994e-02, PNorm = 69.9151, GNorm = 0.6385, lr_0 = 1.7794e-04
Loss = 5.0152e-02, PNorm = 69.9186, GNorm = 0.4445, lr_0 = 1.7782e-04
Validation mae = 0.384569
Epoch 23
Loss = 3.8032e-02, PNorm = 69.9217, GNorm = 0.6199, lr_0 = 1.7769e-04
Loss = 3.8124e-02, PNorm = 69.9237, GNorm = 0.5455, lr_0 = 1.7757e-04
Loss = 3.1775e-02, PNorm = 69.9272, GNorm = 0.5111, lr_0 = 1.7745e-04
Loss = 4.1769e-02, PNorm = 69.9304, GNorm = 0.4839, lr_0 = 1.7733e-04
Loss = 3.4787e-02, PNorm = 69.9338, GNorm = 0.3694, lr_0 = 1.7721e-04
Loss = 3.5245e-02, PNorm = 69.9365, GNorm = 0.5281, lr_0 = 1.7709e-04
Loss = 3.5965e-02, PNorm = 69.9381, GNorm = 0.6102, lr_0 = 1.7696e-04
Loss = 3.9523e-02, PNorm = 69.9400, GNorm = 0.5836, lr_0 = 1.7684e-04
Loss = 3.3618e-02, PNorm = 69.9426, GNorm = 0.5041, lr_0 = 1.7672e-04
Loss = 4.0265e-02, PNorm = 69.9439, GNorm = 0.4191, lr_0 = 1.7660e-04
Loss = 4.2903e-02, PNorm = 69.9444, GNorm = 0.4071, lr_0 = 1.7648e-04
Loss = 4.0666e-02, PNorm = 69.9477, GNorm = 0.4242, lr_0 = 1.7636e-04
Loss = 3.7264e-02, PNorm = 69.9521, GNorm = 0.4489, lr_0 = 1.7624e-04
Loss = 4.7230e-02, PNorm = 69.9559, GNorm = 0.4901, lr_0 = 1.7612e-04
Loss = 3.8896e-02, PNorm = 69.9598, GNorm = 0.9383, lr_0 = 1.7600e-04
Loss = 3.5209e-02, PNorm = 69.9629, GNorm = 0.6035, lr_0 = 1.7588e-04
Loss = 3.8479e-02, PNorm = 69.9660, GNorm = 0.4371, lr_0 = 1.7576e-04
Loss = 3.5943e-02, PNorm = 69.9676, GNorm = 0.4452, lr_0 = 1.7564e-04
Loss = 4.1113e-02, PNorm = 69.9723, GNorm = 0.7563, lr_0 = 1.7552e-04
Loss = 3.7290e-02, PNorm = 69.9764, GNorm = 0.7355, lr_0 = 1.7540e-04
Loss = 3.9520e-02, PNorm = 69.9806, GNorm = 0.3975, lr_0 = 1.7528e-04
Loss = 3.8524e-02, PNorm = 69.9824, GNorm = 0.7329, lr_0 = 1.7516e-04
Loss = 3.6068e-02, PNorm = 69.9855, GNorm = 0.3705, lr_0 = 1.7504e-04
Loss = 3.6310e-02, PNorm = 69.9878, GNorm = 0.5460, lr_0 = 1.7492e-04
Loss = 4.3201e-02, PNorm = 69.9912, GNorm = 0.5555, lr_0 = 1.7480e-04
Loss = 3.7046e-02, PNorm = 69.9951, GNorm = 0.5187, lr_0 = 1.7468e-04
Loss = 4.3459e-02, PNorm = 69.9978, GNorm = 0.6914, lr_0 = 1.7456e-04
Loss = 5.0805e-02, PNorm = 70.0001, GNorm = 0.8689, lr_0 = 1.7444e-04
Loss = 4.1423e-02, PNorm = 70.0036, GNorm = 0.4973, lr_0 = 1.7432e-04
Loss = 3.4947e-02, PNorm = 70.0053, GNorm = 0.5044, lr_0 = 1.7420e-04
Loss = 3.8794e-02, PNorm = 70.0065, GNorm = 0.6448, lr_0 = 1.7408e-04
Loss = 4.1032e-02, PNorm = 70.0083, GNorm = 0.3321, lr_0 = 1.7396e-04
Loss = 4.0489e-02, PNorm = 70.0114, GNorm = 0.4160, lr_0 = 1.7384e-04
Loss = 5.2933e-02, PNorm = 70.0138, GNorm = 0.4089, lr_0 = 1.7372e-04
Loss = 4.2514e-02, PNorm = 70.0163, GNorm = 0.7931, lr_0 = 1.7360e-04
Loss = 3.8277e-02, PNorm = 70.0177, GNorm = 0.7231, lr_0 = 1.7348e-04
Loss = 3.1837e-02, PNorm = 70.0199, GNorm = 0.4663, lr_0 = 1.7336e-04
Loss = 4.4704e-02, PNorm = 70.0229, GNorm = 0.5937, lr_0 = 1.7325e-04
Loss = 3.7243e-02, PNorm = 70.0261, GNorm = 0.5622, lr_0 = 1.7313e-04
Loss = 4.2004e-02, PNorm = 70.0281, GNorm = 0.4009, lr_0 = 1.7301e-04
Loss = 4.1123e-02, PNorm = 70.0297, GNorm = 0.5391, lr_0 = 1.7289e-04
Loss = 3.3151e-02, PNorm = 70.0311, GNorm = 0.5126, lr_0 = 1.7277e-04
Loss = 4.2008e-02, PNorm = 70.0339, GNorm = 0.7041, lr_0 = 1.7265e-04
Loss = 3.5735e-02, PNorm = 70.0367, GNorm = 0.5513, lr_0 = 1.7253e-04
Loss = 3.8262e-02, PNorm = 70.0395, GNorm = 0.3755, lr_0 = 1.7242e-04
Loss = 3.8598e-02, PNorm = 70.0421, GNorm = 0.4740, lr_0 = 1.7230e-04
Loss = 4.3352e-02, PNorm = 70.0460, GNorm = 0.4505, lr_0 = 1.7218e-04
Loss = 4.2102e-02, PNorm = 70.0483, GNorm = 0.5380, lr_0 = 1.7206e-04
Loss = 4.3579e-02, PNorm = 70.0493, GNorm = 0.7427, lr_0 = 1.7194e-04
Loss = 3.4225e-02, PNorm = 70.0519, GNorm = 0.7154, lr_0 = 1.7183e-04
Loss = 4.4302e-02, PNorm = 70.0543, GNorm = 0.4371, lr_0 = 1.7171e-04
Loss = 3.6946e-02, PNorm = 70.0572, GNorm = 0.5048, lr_0 = 1.7159e-04
Loss = 3.8558e-02, PNorm = 70.0613, GNorm = 0.4798, lr_0 = 1.7147e-04
Loss = 3.8218e-02, PNorm = 70.0653, GNorm = 0.6056, lr_0 = 1.7136e-04
Loss = 3.9349e-02, PNorm = 70.0694, GNorm = 0.5152, lr_0 = 1.7124e-04
Loss = 3.9065e-02, PNorm = 70.0716, GNorm = 0.4971, lr_0 = 1.7112e-04
Loss = 3.7298e-02, PNorm = 70.0736, GNorm = 0.4963, lr_0 = 1.7100e-04
Loss = 4.1374e-02, PNorm = 70.0767, GNorm = 0.4910, lr_0 = 1.7089e-04
Loss = 4.1989e-02, PNorm = 70.0799, GNorm = 0.5662, lr_0 = 1.7077e-04
Loss = 4.3311e-02, PNorm = 70.0841, GNorm = 0.4153, lr_0 = 1.7065e-04
Loss = 3.7068e-02, PNorm = 70.0881, GNorm = 0.5338, lr_0 = 1.7054e-04
Loss = 3.9641e-02, PNorm = 70.0898, GNorm = 0.5633, lr_0 = 1.7042e-04
Loss = 4.4602e-02, PNorm = 70.0888, GNorm = 0.6621, lr_0 = 1.7030e-04
Loss = 3.3291e-02, PNorm = 70.0889, GNorm = 0.5514, lr_0 = 1.7019e-04
Loss = 4.0934e-02, PNorm = 70.0909, GNorm = 0.5339, lr_0 = 1.7007e-04
Loss = 3.9573e-02, PNorm = 70.0927, GNorm = 0.5723, lr_0 = 1.6995e-04
Loss = 3.7391e-02, PNorm = 70.0957, GNorm = 0.4639, lr_0 = 1.6984e-04
Loss = 3.9825e-02, PNorm = 70.0996, GNorm = 0.5380, lr_0 = 1.6972e-04
Loss = 3.7734e-02, PNorm = 70.1011, GNorm = 0.6490, lr_0 = 1.6960e-04
Loss = 4.3835e-02, PNorm = 70.1035, GNorm = 0.6516, lr_0 = 1.6949e-04
Loss = 3.7935e-02, PNorm = 70.1055, GNorm = 0.6792, lr_0 = 1.6937e-04
Loss = 3.4460e-02, PNorm = 70.1070, GNorm = 0.5072, lr_0 = 1.6926e-04
Loss = 4.1433e-02, PNorm = 70.1100, GNorm = 0.5273, lr_0 = 1.6914e-04
Loss = 3.5679e-02, PNorm = 70.1138, GNorm = 0.5319, lr_0 = 1.6902e-04
Loss = 3.8686e-02, PNorm = 70.1159, GNorm = 0.4052, lr_0 = 1.6891e-04
Loss = 4.1292e-02, PNorm = 70.1190, GNorm = 0.5329, lr_0 = 1.6879e-04
Loss = 4.3882e-02, PNorm = 70.1224, GNorm = 0.4850, lr_0 = 1.6868e-04
Loss = 4.7916e-02, PNorm = 70.1247, GNorm = 0.6764, lr_0 = 1.6856e-04
Loss = 4.1678e-02, PNorm = 70.1286, GNorm = 0.6080, lr_0 = 1.6845e-04
Loss = 4.1344e-02, PNorm = 70.1333, GNorm = 0.5280, lr_0 = 1.6833e-04
Loss = 3.8969e-02, PNorm = 70.1369, GNorm = 0.7819, lr_0 = 1.6821e-04
Loss = 3.9361e-02, PNorm = 70.1405, GNorm = 0.5255, lr_0 = 1.6810e-04
Loss = 3.6122e-02, PNorm = 70.1430, GNorm = 0.4756, lr_0 = 1.6798e-04
Loss = 4.0433e-02, PNorm = 70.1446, GNorm = 0.4132, lr_0 = 1.6787e-04
Loss = 4.6647e-02, PNorm = 70.1466, GNorm = 0.4337, lr_0 = 1.6775e-04
Loss = 4.2478e-02, PNorm = 70.1488, GNorm = 0.8880, lr_0 = 1.6764e-04
Loss = 4.3108e-02, PNorm = 70.1513, GNorm = 0.5892, lr_0 = 1.6752e-04
Loss = 3.9243e-02, PNorm = 70.1540, GNorm = 0.5389, lr_0 = 1.6741e-04
Loss = 4.2235e-02, PNorm = 70.1561, GNorm = 0.4529, lr_0 = 1.6729e-04
Loss = 3.5894e-02, PNorm = 70.1580, GNorm = 0.5047, lr_0 = 1.6718e-04
Loss = 3.8468e-02, PNorm = 70.1608, GNorm = 0.4375, lr_0 = 1.6707e-04
Loss = 4.1319e-02, PNorm = 70.1634, GNorm = 0.6478, lr_0 = 1.6695e-04
Loss = 3.8706e-02, PNorm = 70.1653, GNorm = 0.7528, lr_0 = 1.6684e-04
Loss = 4.3372e-02, PNorm = 70.1688, GNorm = 0.4414, lr_0 = 1.6672e-04
Loss = 4.1232e-02, PNorm = 70.1711, GNorm = 0.4228, lr_0 = 1.6661e-04
Loss = 4.1553e-02, PNorm = 70.1733, GNorm = 0.4447, lr_0 = 1.6649e-04
Loss = 4.0175e-02, PNorm = 70.1755, GNorm = 0.6117, lr_0 = 1.6638e-04
Loss = 3.8180e-02, PNorm = 70.1779, GNorm = 0.4110, lr_0 = 1.6627e-04
Loss = 4.2043e-02, PNorm = 70.1792, GNorm = 0.7075, lr_0 = 1.6615e-04
Loss = 3.6593e-02, PNorm = 70.1816, GNorm = 0.7849, lr_0 = 1.6604e-04
Loss = 3.5644e-02, PNorm = 70.1847, GNorm = 0.4301, lr_0 = 1.6592e-04
Loss = 4.1317e-02, PNorm = 70.1875, GNorm = 0.5483, lr_0 = 1.6581e-04
Loss = 4.0563e-02, PNorm = 70.1900, GNorm = 0.4059, lr_0 = 1.6570e-04
Loss = 3.9247e-02, PNorm = 70.1914, GNorm = 0.6476, lr_0 = 1.6558e-04
Loss = 4.0446e-02, PNorm = 70.1922, GNorm = 0.4747, lr_0 = 1.6547e-04
Loss = 4.3628e-02, PNorm = 70.1935, GNorm = 0.4841, lr_0 = 1.6536e-04
Loss = 4.1522e-02, PNorm = 70.1955, GNorm = 0.4431, lr_0 = 1.6524e-04
Loss = 3.6834e-02, PNorm = 70.1974, GNorm = 0.5435, lr_0 = 1.6513e-04
Loss = 4.3562e-02, PNorm = 70.1998, GNorm = 0.5395, lr_0 = 1.6502e-04
Loss = 4.5772e-02, PNorm = 70.2021, GNorm = 0.4599, lr_0 = 1.6490e-04
Loss = 3.6705e-02, PNorm = 70.2051, GNorm = 0.4075, lr_0 = 1.6479e-04
Loss = 3.9214e-02, PNorm = 70.2082, GNorm = 0.6545, lr_0 = 1.6468e-04
Loss = 4.3415e-02, PNorm = 70.2117, GNorm = 0.8294, lr_0 = 1.6457e-04
Loss = 4.6494e-02, PNorm = 70.2151, GNorm = 0.5002, lr_0 = 1.6445e-04
Loss = 4.0979e-02, PNorm = 70.2181, GNorm = 0.4554, lr_0 = 1.6434e-04
Loss = 4.1017e-02, PNorm = 70.2198, GNorm = 0.4762, lr_0 = 1.6423e-04
Loss = 3.8529e-02, PNorm = 70.2210, GNorm = 0.4182, lr_0 = 1.6412e-04
Loss = 4.1928e-02, PNorm = 70.2228, GNorm = 0.6683, lr_0 = 1.6400e-04
Loss = 4.9526e-02, PNorm = 70.2253, GNorm = 0.6689, lr_0 = 1.6389e-04
Loss = 4.7811e-02, PNorm = 70.2272, GNorm = 0.6033, lr_0 = 1.6378e-04
Validation mae = 0.385260
Epoch 24
Loss = 3.6906e-02, PNorm = 70.2288, GNorm = 0.4251, lr_0 = 1.6367e-04
Loss = 3.6458e-02, PNorm = 70.2326, GNorm = 0.5707, lr_0 = 1.6355e-04
Loss = 3.5118e-02, PNorm = 70.2367, GNorm = 0.4402, lr_0 = 1.6344e-04
Loss = 3.1340e-02, PNorm = 70.2391, GNorm = 0.2868, lr_0 = 1.6333e-04
Loss = 3.7401e-02, PNorm = 70.2403, GNorm = 0.4740, lr_0 = 1.6322e-04
Loss = 3.3132e-02, PNorm = 70.2430, GNorm = 0.4704, lr_0 = 1.6311e-04
Loss = 3.7678e-02, PNorm = 70.2452, GNorm = 1.0305, lr_0 = 1.6299e-04
Loss = 3.4091e-02, PNorm = 70.2488, GNorm = 0.3934, lr_0 = 1.6288e-04
Loss = 3.2180e-02, PNorm = 70.2517, GNorm = 0.3282, lr_0 = 1.6277e-04
Loss = 2.9151e-02, PNorm = 70.2545, GNorm = 0.5167, lr_0 = 1.6266e-04
Loss = 3.3595e-02, PNorm = 70.2564, GNorm = 0.4679, lr_0 = 1.6255e-04
Loss = 4.0509e-02, PNorm = 70.2589, GNorm = 0.4479, lr_0 = 1.6244e-04
Loss = 3.3737e-02, PNorm = 70.2616, GNorm = 0.4918, lr_0 = 1.6233e-04
Loss = 3.3899e-02, PNorm = 70.2639, GNorm = 0.4253, lr_0 = 1.6221e-04
Loss = 3.7592e-02, PNorm = 70.2664, GNorm = 0.8095, lr_0 = 1.6210e-04
Loss = 3.5473e-02, PNorm = 70.2695, GNorm = 0.4215, lr_0 = 1.6199e-04
Loss = 3.8052e-02, PNorm = 70.2723, GNorm = 0.3569, lr_0 = 1.6188e-04
Loss = 3.4437e-02, PNorm = 70.2748, GNorm = 0.4808, lr_0 = 1.6177e-04
Loss = 3.8895e-02, PNorm = 70.2761, GNorm = 0.5019, lr_0 = 1.6166e-04
Loss = 2.8000e-02, PNorm = 70.2773, GNorm = 0.4650, lr_0 = 1.6155e-04
Loss = 3.5598e-02, PNorm = 70.2792, GNorm = 0.4982, lr_0 = 1.6144e-04
Loss = 3.3726e-02, PNorm = 70.2805, GNorm = 0.4020, lr_0 = 1.6133e-04
Loss = 3.8237e-02, PNorm = 70.2830, GNorm = 0.7801, lr_0 = 1.6122e-04
Loss = 4.0416e-02, PNorm = 70.2860, GNorm = 0.5704, lr_0 = 1.6111e-04
Loss = 4.0544e-02, PNorm = 70.2900, GNorm = 0.5301, lr_0 = 1.6100e-04
Loss = 3.6913e-02, PNorm = 70.2926, GNorm = 0.5652, lr_0 = 1.6089e-04
Loss = 3.7000e-02, PNorm = 70.2943, GNorm = 0.4891, lr_0 = 1.6078e-04
Loss = 3.4488e-02, PNorm = 70.2961, GNorm = 0.3840, lr_0 = 1.6067e-04
Loss = 3.6465e-02, PNorm = 70.2984, GNorm = 0.4033, lr_0 = 1.6056e-04
Loss = 4.0373e-02, PNorm = 70.3009, GNorm = 0.5245, lr_0 = 1.6045e-04
Loss = 5.0837e-02, PNorm = 70.3033, GNorm = 0.5742, lr_0 = 1.6034e-04
Loss = 3.5556e-02, PNorm = 70.3058, GNorm = 0.6658, lr_0 = 1.6023e-04
Loss = 3.2479e-02, PNorm = 70.3077, GNorm = 0.3858, lr_0 = 1.6012e-04
Loss = 4.0087e-02, PNorm = 70.3084, GNorm = 0.4517, lr_0 = 1.6001e-04
Loss = 3.4376e-02, PNorm = 70.3087, GNorm = 0.4063, lr_0 = 1.5990e-04
Loss = 3.4620e-02, PNorm = 70.3095, GNorm = 0.4911, lr_0 = 1.5979e-04
Loss = 3.4364e-02, PNorm = 70.3110, GNorm = 0.7090, lr_0 = 1.5968e-04
Loss = 3.7286e-02, PNorm = 70.3137, GNorm = 0.4792, lr_0 = 1.5957e-04
Loss = 3.8150e-02, PNorm = 70.3165, GNorm = 0.5023, lr_0 = 1.5946e-04
Loss = 3.3332e-02, PNorm = 70.3180, GNorm = 0.5388, lr_0 = 1.5935e-04
Loss = 3.3442e-02, PNorm = 70.3196, GNorm = 0.5517, lr_0 = 1.5924e-04
Loss = 3.9977e-02, PNorm = 70.3241, GNorm = 0.5744, lr_0 = 1.5913e-04
Loss = 3.4219e-02, PNorm = 70.3291, GNorm = 0.3482, lr_0 = 1.5902e-04
Loss = 4.3657e-02, PNorm = 70.3317, GNorm = 0.9306, lr_0 = 1.5891e-04
Loss = 3.7440e-02, PNorm = 70.3335, GNorm = 0.5783, lr_0 = 1.5880e-04
Loss = 3.7948e-02, PNorm = 70.3346, GNorm = 0.4371, lr_0 = 1.5870e-04
Loss = 3.7715e-02, PNorm = 70.3356, GNorm = 0.5457, lr_0 = 1.5859e-04
Loss = 4.4762e-02, PNorm = 70.3387, GNorm = 0.4734, lr_0 = 1.5848e-04
Loss = 3.9575e-02, PNorm = 70.3418, GNorm = 0.3899, lr_0 = 1.5837e-04
Loss = 3.4415e-02, PNorm = 70.3441, GNorm = 0.5084, lr_0 = 1.5826e-04
Loss = 4.1756e-02, PNorm = 70.3469, GNorm = 0.4306, lr_0 = 1.5815e-04
Loss = 3.3582e-02, PNorm = 70.3498, GNorm = 0.4215, lr_0 = 1.5804e-04
Loss = 3.0305e-02, PNorm = 70.3505, GNorm = 0.4107, lr_0 = 1.5794e-04
Loss = 4.2028e-02, PNorm = 70.3515, GNorm = 0.5015, lr_0 = 1.5783e-04
Loss = 4.0966e-02, PNorm = 70.3545, GNorm = 0.7791, lr_0 = 1.5772e-04
Loss = 4.4046e-02, PNorm = 70.3578, GNorm = 0.3941, lr_0 = 1.5761e-04
Loss = 3.4305e-02, PNorm = 70.3602, GNorm = 0.4982, lr_0 = 1.5750e-04
Loss = 3.7517e-02, PNorm = 70.3613, GNorm = 0.7221, lr_0 = 1.5740e-04
Loss = 3.4094e-02, PNorm = 70.3622, GNorm = 0.4287, lr_0 = 1.5729e-04
Loss = 3.1761e-02, PNorm = 70.3640, GNorm = 0.4068, lr_0 = 1.5718e-04
Loss = 3.4850e-02, PNorm = 70.3668, GNorm = 0.4444, lr_0 = 1.5707e-04
Loss = 4.2996e-02, PNorm = 70.3703, GNorm = 0.6015, lr_0 = 1.5697e-04
Loss = 3.9115e-02, PNorm = 70.3741, GNorm = 0.4531, lr_0 = 1.5686e-04
Loss = 4.1925e-02, PNorm = 70.3766, GNorm = 0.7996, lr_0 = 1.5675e-04
Loss = 3.3414e-02, PNorm = 70.3792, GNorm = 0.5188, lr_0 = 1.5664e-04
Loss = 3.8776e-02, PNorm = 70.3817, GNorm = 0.4387, lr_0 = 1.5654e-04
Loss = 3.0843e-02, PNorm = 70.3849, GNorm = 0.6480, lr_0 = 1.5643e-04
Loss = 3.9281e-02, PNorm = 70.3862, GNorm = 0.4299, lr_0 = 1.5632e-04
Loss = 4.8897e-02, PNorm = 70.3896, GNorm = 0.8334, lr_0 = 1.5621e-04
Loss = 3.5590e-02, PNorm = 70.3926, GNorm = 0.6543, lr_0 = 1.5611e-04
Loss = 4.2631e-02, PNorm = 70.3944, GNorm = 0.7153, lr_0 = 1.5600e-04
Loss = 3.5799e-02, PNorm = 70.3967, GNorm = 0.5466, lr_0 = 1.5589e-04
Loss = 3.7162e-02, PNorm = 70.3999, GNorm = 0.4862, lr_0 = 1.5579e-04
Loss = 4.1193e-02, PNorm = 70.4027, GNorm = 0.6436, lr_0 = 1.5568e-04
Loss = 3.8206e-02, PNorm = 70.4047, GNorm = 0.7365, lr_0 = 1.5557e-04
Loss = 4.1726e-02, PNorm = 70.4070, GNorm = 0.5070, lr_0 = 1.5547e-04
Loss = 3.8193e-02, PNorm = 70.4088, GNorm = 0.4718, lr_0 = 1.5536e-04
Loss = 3.6482e-02, PNorm = 70.4108, GNorm = 0.6005, lr_0 = 1.5525e-04
Loss = 4.3678e-02, PNorm = 70.4142, GNorm = 0.5128, lr_0 = 1.5515e-04
Loss = 3.8161e-02, PNorm = 70.4169, GNorm = 0.4642, lr_0 = 1.5504e-04
Loss = 3.9158e-02, PNorm = 70.4191, GNorm = 0.4767, lr_0 = 1.5493e-04
Loss = 4.6872e-02, PNorm = 70.4237, GNorm = 0.6855, lr_0 = 1.5483e-04
Loss = 3.9178e-02, PNorm = 70.4265, GNorm = 0.4113, lr_0 = 1.5472e-04
Loss = 3.5056e-02, PNorm = 70.4279, GNorm = 0.5259, lr_0 = 1.5462e-04
Loss = 3.8465e-02, PNorm = 70.4313, GNorm = 0.4565, lr_0 = 1.5451e-04
Loss = 4.2442e-02, PNorm = 70.4340, GNorm = 0.6982, lr_0 = 1.5440e-04
Loss = 3.5591e-02, PNorm = 70.4350, GNorm = 0.7039, lr_0 = 1.5430e-04
Loss = 3.7268e-02, PNorm = 70.4369, GNorm = 0.5191, lr_0 = 1.5419e-04
Loss = 3.8114e-02, PNorm = 70.4396, GNorm = 0.4914, lr_0 = 1.5409e-04
Loss = 3.6385e-02, PNorm = 70.4415, GNorm = 0.4769, lr_0 = 1.5398e-04
Loss = 4.0559e-02, PNorm = 70.4430, GNorm = 0.4655, lr_0 = 1.5388e-04
Loss = 3.4883e-02, PNorm = 70.4440, GNorm = 0.5495, lr_0 = 1.5377e-04
Loss = 3.6653e-02, PNorm = 70.4453, GNorm = 0.3995, lr_0 = 1.5367e-04
Loss = 3.3114e-02, PNorm = 70.4481, GNorm = 0.4615, lr_0 = 1.5356e-04
Loss = 3.1029e-02, PNorm = 70.4501, GNorm = 0.5594, lr_0 = 1.5346e-04
Loss = 3.3408e-02, PNorm = 70.4505, GNorm = 0.7072, lr_0 = 1.5335e-04
Loss = 3.7631e-02, PNorm = 70.4531, GNorm = 0.4333, lr_0 = 1.5325e-04
Loss = 4.2526e-02, PNorm = 70.4564, GNorm = 0.5882, lr_0 = 1.5314e-04
Loss = 3.9195e-02, PNorm = 70.4591, GNorm = 0.6340, lr_0 = 1.5304e-04
Loss = 4.1527e-02, PNorm = 70.4609, GNorm = 0.5591, lr_0 = 1.5293e-04
Loss = 4.6949e-02, PNorm = 70.4628, GNorm = 0.4871, lr_0 = 1.5283e-04
Loss = 4.2189e-02, PNorm = 70.4645, GNorm = 0.5436, lr_0 = 1.5272e-04
Loss = 3.8823e-02, PNorm = 70.4662, GNorm = 0.5090, lr_0 = 1.5262e-04
Loss = 3.7699e-02, PNorm = 70.4677, GNorm = 0.4474, lr_0 = 1.5251e-04
Loss = 3.0173e-02, PNorm = 70.4693, GNorm = 0.5854, lr_0 = 1.5241e-04
Loss = 4.3336e-02, PNorm = 70.4713, GNorm = 0.4481, lr_0 = 1.5230e-04
Loss = 3.7099e-02, PNorm = 70.4745, GNorm = 0.5335, lr_0 = 1.5220e-04
Loss = 3.7587e-02, PNorm = 70.4762, GNorm = 0.5286, lr_0 = 1.5209e-04
Loss = 4.4088e-02, PNorm = 70.4789, GNorm = 0.6636, lr_0 = 1.5199e-04
Loss = 4.5855e-02, PNorm = 70.4810, GNorm = 0.5133, lr_0 = 1.5189e-04
Loss = 4.4747e-02, PNorm = 70.4840, GNorm = 0.5380, lr_0 = 1.5178e-04
Loss = 3.9727e-02, PNorm = 70.4861, GNorm = 0.4826, lr_0 = 1.5168e-04
Loss = 3.9056e-02, PNorm = 70.4877, GNorm = 0.4699, lr_0 = 1.5157e-04
Loss = 4.6604e-02, PNorm = 70.4893, GNorm = 0.5191, lr_0 = 1.5147e-04
Loss = 4.2051e-02, PNorm = 70.4922, GNorm = 0.3932, lr_0 = 1.5137e-04
Loss = 4.5112e-02, PNorm = 70.4951, GNorm = 0.4789, lr_0 = 1.5126e-04
Loss = 4.2300e-02, PNorm = 70.4965, GNorm = 0.8756, lr_0 = 1.5116e-04
Loss = 3.8866e-02, PNorm = 70.4985, GNorm = 0.5820, lr_0 = 1.5106e-04
Loss = 4.4039e-02, PNorm = 70.5014, GNorm = 0.3458, lr_0 = 1.5095e-04
Loss = 4.1022e-02, PNorm = 70.5025, GNorm = 0.4430, lr_0 = 1.5085e-04
Validation mae = 0.385943
Epoch 25
Loss = 3.2990e-02, PNorm = 70.5035, GNorm = 0.4488, lr_0 = 1.5075e-04
Loss = 3.2170e-02, PNorm = 70.5051, GNorm = 0.5831, lr_0 = 1.5064e-04
Loss = 3.4283e-02, PNorm = 70.5074, GNorm = 0.6600, lr_0 = 1.5054e-04
Loss = 2.8808e-02, PNorm = 70.5099, GNorm = 0.7316, lr_0 = 1.5044e-04
Loss = 3.0727e-02, PNorm = 70.5129, GNorm = 0.5167, lr_0 = 1.5033e-04
Loss = 3.3279e-02, PNorm = 70.5158, GNorm = 0.4177, lr_0 = 1.5023e-04
Loss = 3.5391e-02, PNorm = 70.5186, GNorm = 1.0872, lr_0 = 1.5013e-04
Loss = 3.4993e-02, PNorm = 70.5206, GNorm = 0.9190, lr_0 = 1.5002e-04
Loss = 3.3610e-02, PNorm = 70.5215, GNorm = 0.5556, lr_0 = 1.4992e-04
Loss = 3.5628e-02, PNorm = 70.5237, GNorm = 0.5333, lr_0 = 1.4982e-04
Loss = 3.2893e-02, PNorm = 70.5256, GNorm = 0.4738, lr_0 = 1.4972e-04
Loss = 3.1689e-02, PNorm = 70.5274, GNorm = 0.4525, lr_0 = 1.4961e-04
Loss = 2.9419e-02, PNorm = 70.5303, GNorm = 0.4629, lr_0 = 1.4951e-04
Loss = 3.1779e-02, PNorm = 70.5321, GNorm = 0.4238, lr_0 = 1.4941e-04
Loss = 3.8252e-02, PNorm = 70.5345, GNorm = 0.8343, lr_0 = 1.4931e-04
Loss = 3.4262e-02, PNorm = 70.5362, GNorm = 0.4253, lr_0 = 1.4920e-04
Loss = 3.6237e-02, PNorm = 70.5377, GNorm = 0.5149, lr_0 = 1.4910e-04
Loss = 3.5797e-02, PNorm = 70.5405, GNorm = 0.5031, lr_0 = 1.4900e-04
Loss = 3.4160e-02, PNorm = 70.5430, GNorm = 0.5796, lr_0 = 1.4890e-04
Loss = 3.1376e-02, PNorm = 70.5456, GNorm = 0.6697, lr_0 = 1.4880e-04
Loss = 3.0220e-02, PNorm = 70.5477, GNorm = 0.5647, lr_0 = 1.4869e-04
Loss = 3.2332e-02, PNorm = 70.5486, GNorm = 0.5619, lr_0 = 1.4859e-04
Loss = 3.1899e-02, PNorm = 70.5495, GNorm = 0.5564, lr_0 = 1.4849e-04
Loss = 3.6839e-02, PNorm = 70.5519, GNorm = 0.5125, lr_0 = 1.4839e-04
Loss = 2.9084e-02, PNorm = 70.5538, GNorm = 0.6699, lr_0 = 1.4829e-04
Loss = 3.4150e-02, PNorm = 70.5563, GNorm = 0.7650, lr_0 = 1.4818e-04
Loss = 3.1261e-02, PNorm = 70.5587, GNorm = 0.4397, lr_0 = 1.4808e-04
Loss = 3.4397e-02, PNorm = 70.5618, GNorm = 0.6847, lr_0 = 1.4798e-04
Loss = 3.6361e-02, PNorm = 70.5635, GNorm = 0.4098, lr_0 = 1.4788e-04
Loss = 3.6746e-02, PNorm = 70.5662, GNorm = 0.4722, lr_0 = 1.4778e-04
Loss = 3.2461e-02, PNorm = 70.5688, GNorm = 0.3703, lr_0 = 1.4768e-04
Loss = 3.3166e-02, PNorm = 70.5708, GNorm = 0.5892, lr_0 = 1.4758e-04
Loss = 3.8319e-02, PNorm = 70.5737, GNorm = 0.4560, lr_0 = 1.4748e-04
Loss = 3.6940e-02, PNorm = 70.5757, GNorm = 0.6895, lr_0 = 1.4737e-04
Loss = 3.4466e-02, PNorm = 70.5764, GNorm = 0.4537, lr_0 = 1.4727e-04
Loss = 3.7074e-02, PNorm = 70.5772, GNorm = 0.4842, lr_0 = 1.4717e-04
Loss = 3.1512e-02, PNorm = 70.5795, GNorm = 0.3754, lr_0 = 1.4707e-04
Loss = 3.0209e-02, PNorm = 70.5818, GNorm = 0.3615, lr_0 = 1.4697e-04
Loss = 3.3851e-02, PNorm = 70.5843, GNorm = 0.5086, lr_0 = 1.4687e-04
Loss = 3.1520e-02, PNorm = 70.5869, GNorm = 0.9369, lr_0 = 1.4677e-04
Loss = 3.9135e-02, PNorm = 70.5888, GNorm = 0.5385, lr_0 = 1.4667e-04
Loss = 3.8703e-02, PNorm = 70.5918, GNorm = 0.5183, lr_0 = 1.4657e-04
Loss = 3.4759e-02, PNorm = 70.5947, GNorm = 0.9259, lr_0 = 1.4647e-04
Loss = 3.4285e-02, PNorm = 70.5969, GNorm = 0.4837, lr_0 = 1.4637e-04
Loss = 3.9045e-02, PNorm = 70.5993, GNorm = 0.3669, lr_0 = 1.4627e-04
Loss = 3.4982e-02, PNorm = 70.6019, GNorm = 0.5914, lr_0 = 1.4617e-04
Loss = 3.7882e-02, PNorm = 70.6038, GNorm = 0.6143, lr_0 = 1.4607e-04
Loss = 3.2838e-02, PNorm = 70.6038, GNorm = 0.4270, lr_0 = 1.4597e-04
Loss = 3.4206e-02, PNorm = 70.6054, GNorm = 0.5967, lr_0 = 1.4587e-04
Loss = 3.6829e-02, PNorm = 70.6072, GNorm = 0.4317, lr_0 = 1.4577e-04
Loss = 3.6274e-02, PNorm = 70.6083, GNorm = 0.5197, lr_0 = 1.4567e-04
Loss = 3.7296e-02, PNorm = 70.6095, GNorm = 0.4602, lr_0 = 1.4557e-04
Loss = 3.6381e-02, PNorm = 70.6111, GNorm = 0.6411, lr_0 = 1.4547e-04
Loss = 3.8041e-02, PNorm = 70.6133, GNorm = 0.5773, lr_0 = 1.4537e-04
Loss = 4.0853e-02, PNorm = 70.6142, GNorm = 0.4104, lr_0 = 1.4527e-04
Loss = 3.8478e-02, PNorm = 70.6149, GNorm = 0.4947, lr_0 = 1.4517e-04
Loss = 3.6143e-02, PNorm = 70.6165, GNorm = 0.3756, lr_0 = 1.4507e-04
Loss = 3.3581e-02, PNorm = 70.6189, GNorm = 0.3954, lr_0 = 1.4497e-04
Loss = 3.2646e-02, PNorm = 70.6213, GNorm = 0.3045, lr_0 = 1.4487e-04
Loss = 3.6128e-02, PNorm = 70.6239, GNorm = 0.4271, lr_0 = 1.4477e-04
Loss = 3.8598e-02, PNorm = 70.6258, GNorm = 0.5307, lr_0 = 1.4467e-04
Loss = 4.4648e-02, PNorm = 70.6278, GNorm = 0.5197, lr_0 = 1.4457e-04
Loss = 3.4972e-02, PNorm = 70.6306, GNorm = 0.5301, lr_0 = 1.4447e-04
Loss = 3.6009e-02, PNorm = 70.6323, GNorm = 0.4445, lr_0 = 1.4438e-04
Loss = 3.6988e-02, PNorm = 70.6339, GNorm = 0.7151, lr_0 = 1.4428e-04
Loss = 3.5321e-02, PNorm = 70.6354, GNorm = 0.7431, lr_0 = 1.4418e-04
Loss = 3.4420e-02, PNorm = 70.6367, GNorm = 0.5259, lr_0 = 1.4408e-04
Loss = 3.7320e-02, PNorm = 70.6400, GNorm = 0.5984, lr_0 = 1.4398e-04
Loss = 3.3846e-02, PNorm = 70.6428, GNorm = 0.4322, lr_0 = 1.4388e-04
Loss = 3.6317e-02, PNorm = 70.6456, GNorm = 0.6457, lr_0 = 1.4378e-04
Loss = 3.7875e-02, PNorm = 70.6487, GNorm = 0.6935, lr_0 = 1.4368e-04
Loss = 4.1748e-02, PNorm = 70.6512, GNorm = 0.6006, lr_0 = 1.4359e-04
Loss = 3.8902e-02, PNorm = 70.6535, GNorm = 0.5510, lr_0 = 1.4349e-04
Loss = 3.1948e-02, PNorm = 70.6560, GNorm = 0.5071, lr_0 = 1.4339e-04
Loss = 3.1523e-02, PNorm = 70.6572, GNorm = 0.5258, lr_0 = 1.4329e-04
Loss = 3.4561e-02, PNorm = 70.6590, GNorm = 0.4250, lr_0 = 1.4319e-04
Loss = 3.6204e-02, PNorm = 70.6602, GNorm = 0.4019, lr_0 = 1.4310e-04
Loss = 3.2451e-02, PNorm = 70.6614, GNorm = 0.7256, lr_0 = 1.4300e-04
Loss = 3.9089e-02, PNorm = 70.6638, GNorm = 0.4520, lr_0 = 1.4290e-04
Loss = 4.0484e-02, PNorm = 70.6653, GNorm = 0.5761, lr_0 = 1.4280e-04
Loss = 3.9333e-02, PNorm = 70.6663, GNorm = 0.5535, lr_0 = 1.4270e-04
Loss = 4.5009e-02, PNorm = 70.6682, GNorm = 0.6046, lr_0 = 1.4261e-04
Loss = 3.8838e-02, PNorm = 70.6713, GNorm = 0.4491, lr_0 = 1.4251e-04
Loss = 3.6279e-02, PNorm = 70.6748, GNorm = 0.4515, lr_0 = 1.4241e-04
Loss = 4.0192e-02, PNorm = 70.6774, GNorm = 0.4693, lr_0 = 1.4231e-04
Loss = 4.7380e-02, PNorm = 70.6796, GNorm = 0.5873, lr_0 = 1.4222e-04
Loss = 4.0102e-02, PNorm = 70.6816, GNorm = 0.5845, lr_0 = 1.4212e-04
Loss = 4.2468e-02, PNorm = 70.6839, GNorm = 0.5189, lr_0 = 1.4202e-04
Loss = 3.8897e-02, PNorm = 70.6850, GNorm = 0.7559, lr_0 = 1.4192e-04
Loss = 3.8850e-02, PNorm = 70.6858, GNorm = 0.4731, lr_0 = 1.4183e-04
Loss = 4.1840e-02, PNorm = 70.6875, GNorm = 0.8487, lr_0 = 1.4173e-04
Loss = 3.9638e-02, PNorm = 70.6910, GNorm = 0.4630, lr_0 = 1.4163e-04
Loss = 4.2875e-02, PNorm = 70.6935, GNorm = 0.5821, lr_0 = 1.4153e-04
Loss = 3.4463e-02, PNorm = 70.6954, GNorm = 0.4649, lr_0 = 1.4144e-04
Loss = 4.2011e-02, PNorm = 70.6978, GNorm = 0.4929, lr_0 = 1.4134e-04
Loss = 3.3717e-02, PNorm = 70.6999, GNorm = 0.4149, lr_0 = 1.4124e-04
Loss = 3.9666e-02, PNorm = 70.7012, GNorm = 0.4905, lr_0 = 1.4115e-04
Loss = 4.4289e-02, PNorm = 70.7023, GNorm = 0.6353, lr_0 = 1.4105e-04
Loss = 4.0267e-02, PNorm = 70.7038, GNorm = 0.4247, lr_0 = 1.4095e-04
Loss = 3.9868e-02, PNorm = 70.7065, GNorm = 0.3883, lr_0 = 1.4086e-04
Loss = 3.2711e-02, PNorm = 70.7088, GNorm = 0.4290, lr_0 = 1.4076e-04
Loss = 3.7740e-02, PNorm = 70.7112, GNorm = 0.5195, lr_0 = 1.4066e-04
Loss = 4.2785e-02, PNorm = 70.7143, GNorm = 0.4587, lr_0 = 1.4057e-04
Loss = 2.6954e-02, PNorm = 70.7167, GNorm = 0.4578, lr_0 = 1.4047e-04
Loss = 3.4779e-02, PNorm = 70.7182, GNorm = 0.4386, lr_0 = 1.4038e-04
Loss = 3.4740e-02, PNorm = 70.7196, GNorm = 0.4258, lr_0 = 1.4028e-04
Loss = 4.0906e-02, PNorm = 70.7218, GNorm = 0.6846, lr_0 = 1.4018e-04
Loss = 3.3804e-02, PNorm = 70.7236, GNorm = 0.4114, lr_0 = 1.4009e-04
Loss = 3.3390e-02, PNorm = 70.7247, GNorm = 0.4788, lr_0 = 1.3999e-04
Loss = 3.3830e-02, PNorm = 70.7265, GNorm = 0.5286, lr_0 = 1.3990e-04
Loss = 3.4550e-02, PNorm = 70.7283, GNorm = 0.4630, lr_0 = 1.3980e-04
Loss = 3.8254e-02, PNorm = 70.7308, GNorm = 0.8140, lr_0 = 1.3970e-04
Loss = 3.1822e-02, PNorm = 70.7325, GNorm = 0.5481, lr_0 = 1.3961e-04
Loss = 4.2977e-02, PNorm = 70.7348, GNorm = 0.4568, lr_0 = 1.3951e-04
Loss = 3.1889e-02, PNorm = 70.7369, GNorm = 0.4615, lr_0 = 1.3942e-04
Loss = 4.4824e-02, PNorm = 70.7384, GNorm = 0.5898, lr_0 = 1.3932e-04
Loss = 3.8656e-02, PNorm = 70.7400, GNorm = 0.4577, lr_0 = 1.3923e-04
Loss = 3.6981e-02, PNorm = 70.7420, GNorm = 0.7047, lr_0 = 1.3913e-04
Loss = 4.0074e-02, PNorm = 70.7449, GNorm = 0.3858, lr_0 = 1.3904e-04
Loss = 3.7544e-02, PNorm = 70.7466, GNorm = 0.5191, lr_0 = 1.3894e-04
Validation mae = 0.387117
Epoch 26
Loss = 3.3933e-02, PNorm = 70.7494, GNorm = 0.9247, lr_0 = 1.3884e-04
Loss = 2.9026e-02, PNorm = 70.7522, GNorm = 0.7475, lr_0 = 1.3875e-04
Loss = 3.3837e-02, PNorm = 70.7547, GNorm = 0.3915, lr_0 = 1.3865e-04
Loss = 3.6606e-02, PNorm = 70.7569, GNorm = 0.4619, lr_0 = 1.3856e-04
Loss = 3.0053e-02, PNorm = 70.7603, GNorm = 0.4452, lr_0 = 1.3846e-04
Loss = 3.5942e-02, PNorm = 70.7639, GNorm = 0.7694, lr_0 = 1.3837e-04
Loss = 3.7450e-02, PNorm = 70.7658, GNorm = 0.4959, lr_0 = 1.3828e-04
Loss = 3.5028e-02, PNorm = 70.7677, GNorm = 0.6546, lr_0 = 1.3818e-04
Loss = 3.0691e-02, PNorm = 70.7693, GNorm = 0.4266, lr_0 = 1.3809e-04
Loss = 4.0097e-02, PNorm = 70.7708, GNorm = 0.7596, lr_0 = 1.3799e-04
Loss = 3.2197e-02, PNorm = 70.7737, GNorm = 0.4487, lr_0 = 1.3790e-04
Loss = 3.1606e-02, PNorm = 70.7769, GNorm = 0.3723, lr_0 = 1.3780e-04
Loss = 3.1966e-02, PNorm = 70.7803, GNorm = 0.7208, lr_0 = 1.3771e-04
Loss = 2.9885e-02, PNorm = 70.7824, GNorm = 0.4605, lr_0 = 1.3761e-04
Loss = 2.8385e-02, PNorm = 70.7841, GNorm = 1.0437, lr_0 = 1.3752e-04
Loss = 3.3735e-02, PNorm = 70.7854, GNorm = 0.4587, lr_0 = 1.3742e-04
Loss = 3.7712e-02, PNorm = 70.7878, GNorm = 0.6446, lr_0 = 1.3733e-04
Loss = 3.3201e-02, PNorm = 70.7899, GNorm = 0.3971, lr_0 = 1.3724e-04
Loss = 3.3549e-02, PNorm = 70.7933, GNorm = 0.4969, lr_0 = 1.3714e-04
Loss = 3.4927e-02, PNorm = 70.7959, GNorm = 0.7709, lr_0 = 1.3705e-04
Loss = 3.1275e-02, PNorm = 70.7972, GNorm = 0.6758, lr_0 = 1.3695e-04
Loss = 4.1079e-02, PNorm = 70.7986, GNorm = 0.4579, lr_0 = 1.3686e-04
Loss = 3.3149e-02, PNorm = 70.7998, GNorm = 0.6049, lr_0 = 1.3677e-04
Loss = 3.7752e-02, PNorm = 70.8015, GNorm = 0.5740, lr_0 = 1.3667e-04
Loss = 3.2765e-02, PNorm = 70.8039, GNorm = 0.4613, lr_0 = 1.3658e-04
Loss = 3.5145e-02, PNorm = 70.8058, GNorm = 0.5522, lr_0 = 1.3649e-04
Loss = 3.7361e-02, PNorm = 70.8072, GNorm = 0.5390, lr_0 = 1.3639e-04
Loss = 3.4033e-02, PNorm = 70.8096, GNorm = 0.4309, lr_0 = 1.3630e-04
Loss = 3.4367e-02, PNorm = 70.8124, GNorm = 0.3992, lr_0 = 1.3621e-04
Loss = 4.3264e-02, PNorm = 70.8149, GNorm = 0.4715, lr_0 = 1.3611e-04
Loss = 2.8858e-02, PNorm = 70.8173, GNorm = 0.4545, lr_0 = 1.3602e-04
Loss = 3.5714e-02, PNorm = 70.8187, GNorm = 0.3977, lr_0 = 1.3593e-04
Loss = 3.6940e-02, PNorm = 70.8205, GNorm = 0.4732, lr_0 = 1.3583e-04
Loss = 3.1105e-02, PNorm = 70.8216, GNorm = 0.5810, lr_0 = 1.3574e-04
Loss = 3.5929e-02, PNorm = 70.8232, GNorm = 0.4411, lr_0 = 1.3565e-04
Loss = 3.0677e-02, PNorm = 70.8245, GNorm = 0.4343, lr_0 = 1.3555e-04
Loss = 3.0732e-02, PNorm = 70.8265, GNorm = 0.4359, lr_0 = 1.3546e-04
Loss = 3.1645e-02, PNorm = 70.8280, GNorm = 0.7292, lr_0 = 1.3537e-04
Loss = 3.2346e-02, PNorm = 70.8289, GNorm = 0.4260, lr_0 = 1.3528e-04
Loss = 3.0901e-02, PNorm = 70.8299, GNorm = 0.4070, lr_0 = 1.3518e-04
Loss = 3.2999e-02, PNorm = 70.8321, GNorm = 0.3570, lr_0 = 1.3509e-04
Loss = 3.4156e-02, PNorm = 70.8343, GNorm = 0.9456, lr_0 = 1.3500e-04
Loss = 3.6820e-02, PNorm = 70.8354, GNorm = 0.5810, lr_0 = 1.3491e-04
Loss = 3.4393e-02, PNorm = 70.8372, GNorm = 0.3802, lr_0 = 1.3481e-04
Loss = 3.0894e-02, PNorm = 70.8393, GNorm = 0.3875, lr_0 = 1.3472e-04
Loss = 2.8519e-02, PNorm = 70.8410, GNorm = 0.5329, lr_0 = 1.3463e-04
Loss = 3.2710e-02, PNorm = 70.8430, GNorm = 0.4611, lr_0 = 1.3454e-04
Loss = 3.1100e-02, PNorm = 70.8452, GNorm = 0.4696, lr_0 = 1.3444e-04
Loss = 3.2008e-02, PNorm = 70.8476, GNorm = 0.5738, lr_0 = 1.3435e-04
Loss = 3.9259e-02, PNorm = 70.8501, GNorm = 0.7856, lr_0 = 1.3426e-04
Loss = 3.2539e-02, PNorm = 70.8514, GNorm = 0.4802, lr_0 = 1.3417e-04
Loss = 3.2939e-02, PNorm = 70.8528, GNorm = 0.6792, lr_0 = 1.3408e-04
Loss = 3.6090e-02, PNorm = 70.8552, GNorm = 0.5576, lr_0 = 1.3398e-04
Loss = 3.3096e-02, PNorm = 70.8569, GNorm = 0.5136, lr_0 = 1.3389e-04
Loss = 3.5686e-02, PNorm = 70.8597, GNorm = 0.4072, lr_0 = 1.3380e-04
Loss = 3.2297e-02, PNorm = 70.8617, GNorm = 0.5535, lr_0 = 1.3371e-04
Loss = 3.5363e-02, PNorm = 70.8641, GNorm = 0.4406, lr_0 = 1.3362e-04
Loss = 3.0563e-02, PNorm = 70.8663, GNorm = 0.5359, lr_0 = 1.3353e-04
Loss = 3.1779e-02, PNorm = 70.8672, GNorm = 0.4989, lr_0 = 1.3343e-04
Loss = 3.8567e-02, PNorm = 70.8683, GNorm = 0.5440, lr_0 = 1.3334e-04
Loss = 3.5953e-02, PNorm = 70.8706, GNorm = 0.4084, lr_0 = 1.3325e-04
Loss = 3.1153e-02, PNorm = 70.8736, GNorm = 0.4448, lr_0 = 1.3316e-04
Loss = 3.3308e-02, PNorm = 70.8740, GNorm = 0.3945, lr_0 = 1.3307e-04
Loss = 3.4562e-02, PNorm = 70.8757, GNorm = 0.4272, lr_0 = 1.3298e-04
Loss = 3.6299e-02, PNorm = 70.8778, GNorm = 0.6164, lr_0 = 1.3289e-04
Loss = 3.6441e-02, PNorm = 70.8798, GNorm = 0.5047, lr_0 = 1.3280e-04
Loss = 3.3855e-02, PNorm = 70.8816, GNorm = 0.4815, lr_0 = 1.3270e-04
Loss = 3.4304e-02, PNorm = 70.8839, GNorm = 0.6734, lr_0 = 1.3261e-04
Loss = 3.2898e-02, PNorm = 70.8858, GNorm = 0.4353, lr_0 = 1.3252e-04
Loss = 4.2889e-02, PNorm = 70.8872, GNorm = 0.5035, lr_0 = 1.3243e-04
Loss = 3.5643e-02, PNorm = 70.8890, GNorm = 0.5416, lr_0 = 1.3234e-04
Loss = 3.3738e-02, PNorm = 70.8913, GNorm = 0.5001, lr_0 = 1.3225e-04
Loss = 3.4279e-02, PNorm = 70.8926, GNorm = 0.4778, lr_0 = 1.3216e-04
Loss = 2.8486e-02, PNorm = 70.8943, GNorm = 0.3503, lr_0 = 1.3207e-04
Loss = 3.5687e-02, PNorm = 70.8955, GNorm = 0.4921, lr_0 = 1.3198e-04
Loss = 3.2367e-02, PNorm = 70.8962, GNorm = 0.5184, lr_0 = 1.3189e-04
Loss = 3.5415e-02, PNorm = 70.8971, GNorm = 0.4925, lr_0 = 1.3180e-04
Loss = 3.6298e-02, PNorm = 70.8990, GNorm = 0.5259, lr_0 = 1.3171e-04
Loss = 3.0994e-02, PNorm = 70.9005, GNorm = 0.5419, lr_0 = 1.3162e-04
Loss = 3.6700e-02, PNorm = 70.9025, GNorm = 0.6288, lr_0 = 1.3153e-04
Loss = 3.4103e-02, PNorm = 70.9045, GNorm = 0.4076, lr_0 = 1.3144e-04
Loss = 3.5305e-02, PNorm = 70.9050, GNorm = 0.6387, lr_0 = 1.3135e-04
Loss = 3.4795e-02, PNorm = 70.9063, GNorm = 0.5115, lr_0 = 1.3126e-04
Loss = 3.2625e-02, PNorm = 70.9075, GNorm = 0.4492, lr_0 = 1.3117e-04
Loss = 3.3190e-02, PNorm = 70.9090, GNorm = 0.4484, lr_0 = 1.3108e-04
Loss = 3.4084e-02, PNorm = 70.9097, GNorm = 0.4848, lr_0 = 1.3099e-04
Loss = 3.6292e-02, PNorm = 70.9108, GNorm = 0.8342, lr_0 = 1.3090e-04
Loss = 3.4772e-02, PNorm = 70.9123, GNorm = 0.5489, lr_0 = 1.3081e-04
Loss = 3.2656e-02, PNorm = 70.9140, GNorm = 0.5912, lr_0 = 1.3072e-04
Loss = 4.2434e-02, PNorm = 70.9164, GNorm = 0.5103, lr_0 = 1.3063e-04
Loss = 3.3081e-02, PNorm = 70.9189, GNorm = 0.6833, lr_0 = 1.3054e-04
Loss = 3.0997e-02, PNorm = 70.9211, GNorm = 0.4006, lr_0 = 1.3045e-04
Loss = 3.5685e-02, PNorm = 70.9231, GNorm = 0.5009, lr_0 = 1.3036e-04
Loss = 3.4098e-02, PNorm = 70.9245, GNorm = 0.3934, lr_0 = 1.3027e-04
Loss = 3.9550e-02, PNorm = 70.9253, GNorm = 0.4958, lr_0 = 1.3018e-04
Loss = 3.1684e-02, PNorm = 70.9267, GNorm = 0.5224, lr_0 = 1.3009e-04
Loss = 3.3758e-02, PNorm = 70.9283, GNorm = 0.4870, lr_0 = 1.3000e-04
Loss = 3.1491e-02, PNorm = 70.9298, GNorm = 0.4359, lr_0 = 1.2992e-04
Loss = 3.0035e-02, PNorm = 70.9316, GNorm = 0.4062, lr_0 = 1.2983e-04
Loss = 3.0672e-02, PNorm = 70.9323, GNorm = 0.3793, lr_0 = 1.2974e-04
Loss = 3.1652e-02, PNorm = 70.9344, GNorm = 0.4226, lr_0 = 1.2965e-04
Loss = 3.7639e-02, PNorm = 70.9370, GNorm = 0.6125, lr_0 = 1.2956e-04
Loss = 3.8175e-02, PNorm = 70.9392, GNorm = 0.6180, lr_0 = 1.2947e-04
Loss = 4.2413e-02, PNorm = 70.9430, GNorm = 0.6437, lr_0 = 1.2938e-04
Loss = 4.0558e-02, PNorm = 70.9450, GNorm = 0.5162, lr_0 = 1.2929e-04
Loss = 3.9228e-02, PNorm = 70.9466, GNorm = 0.5095, lr_0 = 1.2921e-04
Loss = 3.7081e-02, PNorm = 70.9488, GNorm = 0.5159, lr_0 = 1.2912e-04
Loss = 3.0737e-02, PNorm = 70.9507, GNorm = 0.4642, lr_0 = 1.2903e-04
Loss = 3.7632e-02, PNorm = 70.9514, GNorm = 0.4706, lr_0 = 1.2894e-04
Loss = 4.3306e-02, PNorm = 70.9536, GNorm = 0.6398, lr_0 = 1.2885e-04
Loss = 2.9993e-02, PNorm = 70.9555, GNorm = 0.4355, lr_0 = 1.2876e-04
Loss = 3.6909e-02, PNorm = 70.9582, GNorm = 0.4568, lr_0 = 1.2867e-04
Loss = 3.4483e-02, PNorm = 70.9607, GNorm = 0.5374, lr_0 = 1.2859e-04
Loss = 4.1067e-02, PNorm = 70.9628, GNorm = 0.5199, lr_0 = 1.2850e-04
Loss = 3.4279e-02, PNorm = 70.9646, GNorm = 0.5490, lr_0 = 1.2841e-04
Loss = 3.0407e-02, PNorm = 70.9663, GNorm = 0.3974, lr_0 = 1.2832e-04
Loss = 3.0975e-02, PNorm = 70.9674, GNorm = 0.5195, lr_0 = 1.2823e-04
Loss = 3.1940e-02, PNorm = 70.9687, GNorm = 0.4111, lr_0 = 1.2815e-04
Loss = 3.6537e-02, PNorm = 70.9703, GNorm = 0.5800, lr_0 = 1.2806e-04
Loss = 4.1837e-02, PNorm = 70.9723, GNorm = 0.6178, lr_0 = 1.2797e-04
Validation mae = 0.404119
Epoch 27
Loss = 3.1745e-02, PNorm = 70.9744, GNorm = 0.7100, lr_0 = 1.2788e-04
Loss = 3.1764e-02, PNorm = 70.9756, GNorm = 0.3449, lr_0 = 1.2780e-04
Loss = 2.9385e-02, PNorm = 70.9768, GNorm = 0.5377, lr_0 = 1.2771e-04
Loss = 3.3032e-02, PNorm = 70.9781, GNorm = 0.5969, lr_0 = 1.2762e-04
Loss = 2.8447e-02, PNorm = 70.9803, GNorm = 0.5123, lr_0 = 1.2753e-04
Loss = 3.4168e-02, PNorm = 70.9815, GNorm = 0.7866, lr_0 = 1.2745e-04
Loss = 3.4906e-02, PNorm = 70.9843, GNorm = 0.4265, lr_0 = 1.2736e-04
Loss = 3.1036e-02, PNorm = 70.9850, GNorm = 0.4432, lr_0 = 1.2727e-04
Loss = 3.4922e-02, PNorm = 70.9865, GNorm = 0.4857, lr_0 = 1.2718e-04
Loss = 3.0432e-02, PNorm = 70.9894, GNorm = 0.5818, lr_0 = 1.2710e-04
Loss = 3.0738e-02, PNorm = 70.9909, GNorm = 0.6159, lr_0 = 1.2701e-04
Loss = 3.8210e-02, PNorm = 70.9921, GNorm = 0.9253, lr_0 = 1.2692e-04
Loss = 3.2143e-02, PNorm = 70.9937, GNorm = 0.3804, lr_0 = 1.2684e-04
Loss = 3.0531e-02, PNorm = 70.9964, GNorm = 0.5509, lr_0 = 1.2675e-04
Loss = 3.4065e-02, PNorm = 70.9991, GNorm = 0.4553, lr_0 = 1.2666e-04
Loss = 3.5876e-02, PNorm = 71.0013, GNorm = 0.4430, lr_0 = 1.2658e-04
Loss = 3.0561e-02, PNorm = 71.0036, GNorm = 0.5367, lr_0 = 1.2649e-04
Loss = 3.0169e-02, PNorm = 71.0051, GNorm = 0.6450, lr_0 = 1.2640e-04
Loss = 3.3509e-02, PNorm = 71.0075, GNorm = 0.4373, lr_0 = 1.2632e-04
Loss = 3.6019e-02, PNorm = 71.0100, GNorm = 0.5170, lr_0 = 1.2623e-04
Loss = 3.1846e-02, PNorm = 71.0113, GNorm = 0.4576, lr_0 = 1.2614e-04
Loss = 3.3336e-02, PNorm = 71.0129, GNorm = 0.4017, lr_0 = 1.2606e-04
Loss = 2.9814e-02, PNorm = 71.0144, GNorm = 0.6492, lr_0 = 1.2597e-04
Loss = 3.4052e-02, PNorm = 71.0160, GNorm = 0.4609, lr_0 = 1.2588e-04
Loss = 3.2440e-02, PNorm = 71.0181, GNorm = 0.3972, lr_0 = 1.2580e-04
Loss = 3.2629e-02, PNorm = 71.0194, GNorm = 0.5773, lr_0 = 1.2571e-04
Loss = 3.1391e-02, PNorm = 71.0203, GNorm = 0.3045, lr_0 = 1.2563e-04
Loss = 3.0749e-02, PNorm = 71.0219, GNorm = 0.5050, lr_0 = 1.2554e-04
Loss = 3.1375e-02, PNorm = 71.0240, GNorm = 0.4633, lr_0 = 1.2545e-04
Loss = 3.1441e-02, PNorm = 71.0259, GNorm = 0.4277, lr_0 = 1.2537e-04
Loss = 3.2628e-02, PNorm = 71.0263, GNorm = 0.4208, lr_0 = 1.2528e-04
Loss = 2.7092e-02, PNorm = 71.0276, GNorm = 0.5390, lr_0 = 1.2520e-04
Loss = 3.1919e-02, PNorm = 71.0290, GNorm = 0.4710, lr_0 = 1.2511e-04
Loss = 3.0142e-02, PNorm = 71.0301, GNorm = 0.7639, lr_0 = 1.2502e-04
Loss = 2.8915e-02, PNorm = 71.0312, GNorm = 0.4405, lr_0 = 1.2494e-04
Loss = 3.5932e-02, PNorm = 71.0322, GNorm = 0.7614, lr_0 = 1.2485e-04
Loss = 4.0855e-02, PNorm = 71.0336, GNorm = 0.5849, lr_0 = 1.2477e-04
Loss = 3.0124e-02, PNorm = 71.0359, GNorm = 0.6536, lr_0 = 1.2468e-04
Loss = 3.2819e-02, PNorm = 71.0379, GNorm = 0.4227, lr_0 = 1.2460e-04
Loss = 3.7794e-02, PNorm = 71.0406, GNorm = 0.3794, lr_0 = 1.2451e-04
Loss = 3.1082e-02, PNorm = 71.0422, GNorm = 0.3785, lr_0 = 1.2443e-04
Loss = 3.1699e-02, PNorm = 71.0435, GNorm = 0.4661, lr_0 = 1.2434e-04
Loss = 3.2752e-02, PNorm = 71.0441, GNorm = 0.4491, lr_0 = 1.2426e-04
Loss = 3.0534e-02, PNorm = 71.0457, GNorm = 0.5300, lr_0 = 1.2417e-04
Loss = 3.0609e-02, PNorm = 71.0474, GNorm = 0.4125, lr_0 = 1.2409e-04
Loss = 2.8407e-02, PNorm = 71.0487, GNorm = 0.3273, lr_0 = 1.2400e-04
Loss = 3.1165e-02, PNorm = 71.0504, GNorm = 0.4182, lr_0 = 1.2392e-04
Loss = 2.9634e-02, PNorm = 71.0531, GNorm = 0.5343, lr_0 = 1.2383e-04
Loss = 3.3486e-02, PNorm = 71.0552, GNorm = 0.4544, lr_0 = 1.2375e-04
Loss = 3.2720e-02, PNorm = 71.0569, GNorm = 0.6030, lr_0 = 1.2366e-04
Loss = 3.9572e-02, PNorm = 71.0583, GNorm = 0.5693, lr_0 = 1.2358e-04
Loss = 3.0097e-02, PNorm = 71.0586, GNorm = 0.4408, lr_0 = 1.2349e-04
Loss = 2.9066e-02, PNorm = 71.0595, GNorm = 0.4944, lr_0 = 1.2341e-04
Loss = 2.8815e-02, PNorm = 71.0604, GNorm = 0.4754, lr_0 = 1.2332e-04
Loss = 3.9181e-02, PNorm = 71.0618, GNorm = 0.5568, lr_0 = 1.2324e-04
Loss = 3.1988e-02, PNorm = 71.0637, GNorm = 0.4053, lr_0 = 1.2315e-04
Loss = 3.0968e-02, PNorm = 71.0663, GNorm = 0.5243, lr_0 = 1.2307e-04
Loss = 3.0197e-02, PNorm = 71.0679, GNorm = 0.3982, lr_0 = 1.2298e-04
Loss = 3.6415e-02, PNorm = 71.0689, GNorm = 0.4658, lr_0 = 1.2290e-04
Loss = 2.9815e-02, PNorm = 71.0699, GNorm = 0.5129, lr_0 = 1.2282e-04
Loss = 2.8764e-02, PNorm = 71.0705, GNorm = 0.3607, lr_0 = 1.2273e-04
Loss = 3.1693e-02, PNorm = 71.0710, GNorm = 0.4406, lr_0 = 1.2265e-04
Loss = 3.1979e-02, PNorm = 71.0726, GNorm = 0.4564, lr_0 = 1.2256e-04
Loss = 3.2466e-02, PNorm = 71.0734, GNorm = 0.4383, lr_0 = 1.2248e-04
Loss = 3.0311e-02, PNorm = 71.0752, GNorm = 0.6519, lr_0 = 1.2240e-04
Loss = 3.7106e-02, PNorm = 71.0772, GNorm = 0.4488, lr_0 = 1.2231e-04
Loss = 4.1329e-02, PNorm = 71.0799, GNorm = 0.4743, lr_0 = 1.2223e-04
Loss = 3.5020e-02, PNorm = 71.0820, GNorm = 0.5885, lr_0 = 1.2214e-04
Loss = 2.7267e-02, PNorm = 71.0841, GNorm = 0.5108, lr_0 = 1.2206e-04
Loss = 3.1888e-02, PNorm = 71.0863, GNorm = 0.3681, lr_0 = 1.2198e-04
Loss = 3.3894e-02, PNorm = 71.0887, GNorm = 0.5858, lr_0 = 1.2189e-04
Loss = 3.4598e-02, PNorm = 71.0904, GNorm = 0.6025, lr_0 = 1.2181e-04
Loss = 3.2325e-02, PNorm = 71.0914, GNorm = 0.4531, lr_0 = 1.2173e-04
Loss = 3.6930e-02, PNorm = 71.0934, GNorm = 0.5386, lr_0 = 1.2164e-04
Loss = 2.9503e-02, PNorm = 71.0954, GNorm = 0.4039, lr_0 = 1.2156e-04
Loss = 3.0127e-02, PNorm = 71.0973, GNorm = 0.4207, lr_0 = 1.2148e-04
Loss = 3.5205e-02, PNorm = 71.0993, GNorm = 0.7422, lr_0 = 1.2139e-04
Loss = 3.9516e-02, PNorm = 71.1012, GNorm = 0.4569, lr_0 = 1.2131e-04
Loss = 3.4200e-02, PNorm = 71.1033, GNorm = 0.5313, lr_0 = 1.2123e-04
Loss = 3.3201e-02, PNorm = 71.1059, GNorm = 0.4961, lr_0 = 1.2114e-04
Loss = 3.3317e-02, PNorm = 71.1074, GNorm = 0.4446, lr_0 = 1.2106e-04
Loss = 3.0499e-02, PNorm = 71.1091, GNorm = 0.6214, lr_0 = 1.2098e-04
Loss = 3.1199e-02, PNorm = 71.1107, GNorm = 0.5687, lr_0 = 1.2090e-04
Loss = 3.4337e-02, PNorm = 71.1120, GNorm = 0.4730, lr_0 = 1.2081e-04
Loss = 3.5409e-02, PNorm = 71.1141, GNorm = 0.4996, lr_0 = 1.2073e-04
Loss = 3.5955e-02, PNorm = 71.1164, GNorm = 0.6106, lr_0 = 1.2065e-04
Loss = 3.8913e-02, PNorm = 71.1182, GNorm = 0.5648, lr_0 = 1.2056e-04
Loss = 4.3223e-02, PNorm = 71.1194, GNorm = 0.7388, lr_0 = 1.2048e-04
Loss = 3.1969e-02, PNorm = 71.1217, GNorm = 0.4442, lr_0 = 1.2040e-04
Loss = 3.0413e-02, PNorm = 71.1231, GNorm = 0.4585, lr_0 = 1.2032e-04
Loss = 3.2515e-02, PNorm = 71.1238, GNorm = 0.3975, lr_0 = 1.2023e-04
Loss = 3.4354e-02, PNorm = 71.1251, GNorm = 0.4393, lr_0 = 1.2015e-04
Loss = 3.1872e-02, PNorm = 71.1261, GNorm = 0.4395, lr_0 = 1.2007e-04
Loss = 3.6041e-02, PNorm = 71.1271, GNorm = 0.6878, lr_0 = 1.1999e-04
Loss = 3.2619e-02, PNorm = 71.1280, GNorm = 0.4272, lr_0 = 1.1991e-04
Loss = 3.1291e-02, PNorm = 71.1298, GNorm = 0.3774, lr_0 = 1.1982e-04
Loss = 3.6138e-02, PNorm = 71.1331, GNorm = 0.6471, lr_0 = 1.1974e-04
Loss = 3.8211e-02, PNorm = 71.1357, GNorm = 0.4209, lr_0 = 1.1966e-04
Loss = 2.9057e-02, PNorm = 71.1367, GNorm = 0.4325, lr_0 = 1.1958e-04
Loss = 3.0404e-02, PNorm = 71.1383, GNorm = 0.4471, lr_0 = 1.1950e-04
Loss = 3.8794e-02, PNorm = 71.1410, GNorm = 0.4928, lr_0 = 1.1941e-04
Loss = 3.0715e-02, PNorm = 71.1442, GNorm = 0.4371, lr_0 = 1.1933e-04
Loss = 3.3902e-02, PNorm = 71.1450, GNorm = 0.6519, lr_0 = 1.1925e-04
Loss = 3.4812e-02, PNorm = 71.1459, GNorm = 0.6010, lr_0 = 1.1917e-04
Loss = 3.0058e-02, PNorm = 71.1475, GNorm = 0.5736, lr_0 = 1.1909e-04
Loss = 3.6717e-02, PNorm = 71.1490, GNorm = 0.6572, lr_0 = 1.1901e-04
Loss = 3.6212e-02, PNorm = 71.1503, GNorm = 0.9358, lr_0 = 1.1892e-04
Loss = 3.2859e-02, PNorm = 71.1525, GNorm = 0.4529, lr_0 = 1.1884e-04
Loss = 3.3137e-02, PNorm = 71.1536, GNorm = 0.5159, lr_0 = 1.1876e-04
Loss = 3.6803e-02, PNorm = 71.1545, GNorm = 0.4761, lr_0 = 1.1868e-04
Loss = 3.0980e-02, PNorm = 71.1561, GNorm = 0.3867, lr_0 = 1.1860e-04
Loss = 3.1509e-02, PNorm = 71.1580, GNorm = 0.4507, lr_0 = 1.1852e-04
Loss = 2.8263e-02, PNorm = 71.1597, GNorm = 0.6912, lr_0 = 1.1844e-04
Loss = 3.2665e-02, PNorm = 71.1618, GNorm = 0.4670, lr_0 = 1.1835e-04
Loss = 3.2487e-02, PNorm = 71.1631, GNorm = 0.4098, lr_0 = 1.1827e-04
Loss = 3.2094e-02, PNorm = 71.1650, GNorm = 0.5396, lr_0 = 1.1819e-04
Loss = 3.3640e-02, PNorm = 71.1667, GNorm = 0.5230, lr_0 = 1.1811e-04
Loss = 3.9856e-02, PNorm = 71.1681, GNorm = 0.5192, lr_0 = 1.1803e-04
Loss = 3.8924e-02, PNorm = 71.1694, GNorm = 0.6299, lr_0 = 1.1795e-04
Loss = 3.2326e-02, PNorm = 71.1719, GNorm = 0.4330, lr_0 = 1.1787e-04
Validation mae = 0.388924
Epoch 28
Loss = 3.6983e-02, PNorm = 71.1734, GNorm = 0.3488, lr_0 = 1.1779e-04
Loss = 2.6874e-02, PNorm = 71.1745, GNorm = 0.4912, lr_0 = 1.1771e-04
Loss = 2.8101e-02, PNorm = 71.1760, GNorm = 0.4538, lr_0 = 1.1763e-04
Loss = 3.3606e-02, PNorm = 71.1780, GNorm = 0.4832, lr_0 = 1.1755e-04
Loss = 2.5219e-02, PNorm = 71.1804, GNorm = 0.5375, lr_0 = 1.1747e-04
Loss = 3.1352e-02, PNorm = 71.1808, GNorm = 0.5212, lr_0 = 1.1739e-04
Loss = 3.1972e-02, PNorm = 71.1818, GNorm = 0.3917, lr_0 = 1.1730e-04
Loss = 2.9366e-02, PNorm = 71.1835, GNorm = 0.3635, lr_0 = 1.1722e-04
Loss = 2.6634e-02, PNorm = 71.1858, GNorm = 0.5362, lr_0 = 1.1714e-04
Loss = 2.9896e-02, PNorm = 71.1881, GNorm = 0.4602, lr_0 = 1.1706e-04
Loss = 3.2487e-02, PNorm = 71.1903, GNorm = 0.7993, lr_0 = 1.1698e-04
Loss = 3.0486e-02, PNorm = 71.1919, GNorm = 0.4623, lr_0 = 1.1690e-04
Loss = 2.8119e-02, PNorm = 71.1934, GNorm = 0.4117, lr_0 = 1.1682e-04
Loss = 2.5469e-02, PNorm = 71.1950, GNorm = 0.4750, lr_0 = 1.1674e-04
Loss = 2.6454e-02, PNorm = 71.1968, GNorm = 0.4813, lr_0 = 1.1666e-04
Loss = 2.9362e-02, PNorm = 71.1975, GNorm = 0.3478, lr_0 = 1.1658e-04
Loss = 2.8943e-02, PNorm = 71.1985, GNorm = 0.4397, lr_0 = 1.1650e-04
Loss = 3.2139e-02, PNorm = 71.2001, GNorm = 0.4066, lr_0 = 1.1642e-04
Loss = 2.7901e-02, PNorm = 71.2021, GNorm = 0.3760, lr_0 = 1.1634e-04
Loss = 2.9042e-02, PNorm = 71.2034, GNorm = 0.4211, lr_0 = 1.1626e-04
Loss = 2.9077e-02, PNorm = 71.2052, GNorm = 0.3993, lr_0 = 1.1618e-04
Loss = 2.9058e-02, PNorm = 71.2066, GNorm = 0.4705, lr_0 = 1.1611e-04
Loss = 3.5630e-02, PNorm = 71.2096, GNorm = 0.4538, lr_0 = 1.1603e-04
Loss = 3.5821e-02, PNorm = 71.2123, GNorm = 0.8385, lr_0 = 1.1595e-04
Loss = 3.0544e-02, PNorm = 71.2139, GNorm = 0.4661, lr_0 = 1.1587e-04
Loss = 2.8981e-02, PNorm = 71.2151, GNorm = 0.4722, lr_0 = 1.1579e-04
Loss = 2.7352e-02, PNorm = 71.2163, GNorm = 0.4896, lr_0 = 1.1571e-04
Loss = 2.7789e-02, PNorm = 71.2185, GNorm = 0.8173, lr_0 = 1.1563e-04
Loss = 2.9140e-02, PNorm = 71.2203, GNorm = 0.6207, lr_0 = 1.1555e-04
Loss = 2.4963e-02, PNorm = 71.2215, GNorm = 0.3906, lr_0 = 1.1547e-04
Loss = 3.1662e-02, PNorm = 71.2236, GNorm = 0.6133, lr_0 = 1.1539e-04
Loss = 2.8336e-02, PNorm = 71.2254, GNorm = 0.4202, lr_0 = 1.1531e-04
Loss = 3.2624e-02, PNorm = 71.2260, GNorm = 0.4240, lr_0 = 1.1523e-04
Loss = 3.1911e-02, PNorm = 71.2265, GNorm = 0.5471, lr_0 = 1.1515e-04
Loss = 2.9031e-02, PNorm = 71.2276, GNorm = 0.5481, lr_0 = 1.1508e-04
Loss = 3.0975e-02, PNorm = 71.2296, GNorm = 0.6608, lr_0 = 1.1500e-04
Loss = 3.5262e-02, PNorm = 71.2317, GNorm = 0.9439, lr_0 = 1.1492e-04
Loss = 3.0029e-02, PNorm = 71.2326, GNorm = 0.5291, lr_0 = 1.1484e-04
Loss = 3.5736e-02, PNorm = 71.2338, GNorm = 0.7827, lr_0 = 1.1476e-04
Loss = 2.5408e-02, PNorm = 71.2351, GNorm = 0.4849, lr_0 = 1.1468e-04
Loss = 2.9438e-02, PNorm = 71.2357, GNorm = 0.5559, lr_0 = 1.1460e-04
Loss = 2.8849e-02, PNorm = 71.2369, GNorm = 0.3926, lr_0 = 1.1452e-04
Loss = 3.3085e-02, PNorm = 71.2392, GNorm = 0.6493, lr_0 = 1.1445e-04
Loss = 3.2351e-02, PNorm = 71.2410, GNorm = 0.5026, lr_0 = 1.1437e-04
Loss = 2.9848e-02, PNorm = 71.2424, GNorm = 0.6544, lr_0 = 1.1429e-04
Loss = 2.6639e-02, PNorm = 71.2432, GNorm = 0.6163, lr_0 = 1.1421e-04
Loss = 2.8620e-02, PNorm = 71.2449, GNorm = 0.3711, lr_0 = 1.1413e-04
Loss = 3.8289e-02, PNorm = 71.2461, GNorm = 0.4579, lr_0 = 1.1405e-04
Loss = 3.0914e-02, PNorm = 71.2485, GNorm = 0.4328, lr_0 = 1.1398e-04
Loss = 2.9899e-02, PNorm = 71.2496, GNorm = 0.5396, lr_0 = 1.1390e-04
Loss = 2.7186e-02, PNorm = 71.2496, GNorm = 0.4959, lr_0 = 1.1382e-04
Loss = 2.7481e-02, PNorm = 71.2506, GNorm = 0.5452, lr_0 = 1.1374e-04
Loss = 2.8844e-02, PNorm = 71.2520, GNorm = 0.4317, lr_0 = 1.1366e-04
Loss = 3.1016e-02, PNorm = 71.2537, GNorm = 0.6818, lr_0 = 1.1359e-04
Loss = 2.9522e-02, PNorm = 71.2554, GNorm = 0.5356, lr_0 = 1.1351e-04
Loss = 2.6205e-02, PNorm = 71.2568, GNorm = 0.4452, lr_0 = 1.1343e-04
Loss = 3.5430e-02, PNorm = 71.2592, GNorm = 0.4497, lr_0 = 1.1335e-04
Loss = 3.2509e-02, PNorm = 71.2608, GNorm = 0.3280, lr_0 = 1.1328e-04
Loss = 3.3372e-02, PNorm = 71.2624, GNorm = 0.6678, lr_0 = 1.1320e-04
Loss = 2.9703e-02, PNorm = 71.2643, GNorm = 0.4594, lr_0 = 1.1312e-04
Loss = 2.6147e-02, PNorm = 71.2659, GNorm = 0.4656, lr_0 = 1.1304e-04
Loss = 2.8015e-02, PNorm = 71.2679, GNorm = 0.4202, lr_0 = 1.1297e-04
Loss = 3.1709e-02, PNorm = 71.2694, GNorm = 0.4760, lr_0 = 1.1289e-04
Loss = 2.8379e-02, PNorm = 71.2710, GNorm = 0.4199, lr_0 = 1.1281e-04
Loss = 3.2382e-02, PNorm = 71.2728, GNorm = 0.4115, lr_0 = 1.1273e-04
Loss = 3.1080e-02, PNorm = 71.2736, GNorm = 0.4274, lr_0 = 1.1266e-04
Loss = 3.3017e-02, PNorm = 71.2741, GNorm = 0.3203, lr_0 = 1.1258e-04
Loss = 3.2550e-02, PNorm = 71.2756, GNorm = 0.4881, lr_0 = 1.1250e-04
Loss = 3.1253e-02, PNorm = 71.2771, GNorm = 0.6416, lr_0 = 1.1243e-04
Loss = 3.3045e-02, PNorm = 71.2793, GNorm = 0.5440, lr_0 = 1.1235e-04
Loss = 3.3615e-02, PNorm = 71.2805, GNorm = 0.4489, lr_0 = 1.1227e-04
Loss = 3.0871e-02, PNorm = 71.2805, GNorm = 0.3825, lr_0 = 1.1219e-04
Loss = 3.1814e-02, PNorm = 71.2809, GNorm = 0.4488, lr_0 = 1.1212e-04
Loss = 3.3903e-02, PNorm = 71.2829, GNorm = 0.4109, lr_0 = 1.1204e-04
Loss = 2.8453e-02, PNorm = 71.2842, GNorm = 0.4736, lr_0 = 1.1196e-04
Loss = 3.7800e-02, PNorm = 71.2863, GNorm = 0.6489, lr_0 = 1.1189e-04
Loss = 3.7788e-02, PNorm = 71.2874, GNorm = 0.7156, lr_0 = 1.1181e-04
Loss = 2.8131e-02, PNorm = 71.2884, GNorm = 0.4659, lr_0 = 1.1173e-04
Loss = 2.8634e-02, PNorm = 71.2905, GNorm = 0.4464, lr_0 = 1.1166e-04
Loss = 3.5026e-02, PNorm = 71.2918, GNorm = 0.5593, lr_0 = 1.1158e-04
Loss = 3.3004e-02, PNorm = 71.2927, GNorm = 0.6083, lr_0 = 1.1150e-04
Loss = 3.5740e-02, PNorm = 71.2945, GNorm = 0.3154, lr_0 = 1.1143e-04
Loss = 2.7011e-02, PNorm = 71.2958, GNorm = 0.5571, lr_0 = 1.1135e-04
Loss = 3.0371e-02, PNorm = 71.2979, GNorm = 0.4012, lr_0 = 1.1128e-04
Loss = 3.4423e-02, PNorm = 71.2993, GNorm = 0.3900, lr_0 = 1.1120e-04
Loss = 4.0498e-02, PNorm = 71.3006, GNorm = 0.4787, lr_0 = 1.1112e-04
Loss = 3.4778e-02, PNorm = 71.3017, GNorm = 0.3758, lr_0 = 1.1105e-04
Loss = 3.7193e-02, PNorm = 71.3034, GNorm = 0.5320, lr_0 = 1.1097e-04
Loss = 3.1522e-02, PNorm = 71.3053, GNorm = 0.5908, lr_0 = 1.1089e-04
Loss = 3.3938e-02, PNorm = 71.3073, GNorm = 0.4307, lr_0 = 1.1082e-04
Loss = 2.9794e-02, PNorm = 71.3090, GNorm = 0.4613, lr_0 = 1.1074e-04
Loss = 3.0476e-02, PNorm = 71.3115, GNorm = 0.4827, lr_0 = 1.1067e-04
Loss = 3.3972e-02, PNorm = 71.3128, GNorm = 0.4659, lr_0 = 1.1059e-04
Loss = 3.1827e-02, PNorm = 71.3136, GNorm = 0.3860, lr_0 = 1.1052e-04
Loss = 3.4353e-02, PNorm = 71.3145, GNorm = 0.4686, lr_0 = 1.1044e-04
Loss = 3.1180e-02, PNorm = 71.3155, GNorm = 0.5729, lr_0 = 1.1036e-04
Loss = 3.7491e-02, PNorm = 71.3165, GNorm = 0.5968, lr_0 = 1.1029e-04
Loss = 3.2001e-02, PNorm = 71.3183, GNorm = 0.7080, lr_0 = 1.1021e-04
Loss = 3.4009e-02, PNorm = 71.3204, GNorm = 0.4469, lr_0 = 1.1014e-04
Loss = 3.4607e-02, PNorm = 71.3212, GNorm = 0.6893, lr_0 = 1.1006e-04
Loss = 3.0657e-02, PNorm = 71.3236, GNorm = 0.4203, lr_0 = 1.0999e-04
Loss = 3.3153e-02, PNorm = 71.3255, GNorm = 0.4506, lr_0 = 1.0991e-04
Loss = 2.9034e-02, PNorm = 71.3266, GNorm = 0.4765, lr_0 = 1.0984e-04
Loss = 3.2977e-02, PNorm = 71.3276, GNorm = 0.5064, lr_0 = 1.0976e-04
Loss = 3.4677e-02, PNorm = 71.3293, GNorm = 0.3884, lr_0 = 1.0969e-04
Loss = 2.9898e-02, PNorm = 71.3309, GNorm = 0.5426, lr_0 = 1.0961e-04
Loss = 2.9740e-02, PNorm = 71.3313, GNorm = 0.3907, lr_0 = 1.0954e-04
Loss = 2.9488e-02, PNorm = 71.3322, GNorm = 0.6836, lr_0 = 1.0946e-04
Loss = 3.1604e-02, PNorm = 71.3326, GNorm = 0.4441, lr_0 = 1.0939e-04
Loss = 3.2907e-02, PNorm = 71.3333, GNorm = 0.4482, lr_0 = 1.0931e-04
Loss = 3.6423e-02, PNorm = 71.3339, GNorm = 0.4621, lr_0 = 1.0924e-04
Loss = 3.2089e-02, PNorm = 71.3340, GNorm = 0.3922, lr_0 = 1.0916e-04
Loss = 3.2534e-02, PNorm = 71.3353, GNorm = 0.3792, lr_0 = 1.0909e-04
Loss = 3.4012e-02, PNorm = 71.3375, GNorm = 0.4589, lr_0 = 1.0901e-04
Loss = 3.5587e-02, PNorm = 71.3395, GNorm = 0.4222, lr_0 = 1.0894e-04
Loss = 3.3417e-02, PNorm = 71.3410, GNorm = 0.6547, lr_0 = 1.0886e-04
Loss = 3.7643e-02, PNorm = 71.3427, GNorm = 0.6746, lr_0 = 1.0879e-04
Loss = 2.9604e-02, PNorm = 71.3436, GNorm = 0.6989, lr_0 = 1.0871e-04
Loss = 3.5835e-02, PNorm = 71.3441, GNorm = 0.6859, lr_0 = 1.0864e-04
Loss = 3.0130e-02, PNorm = 71.3459, GNorm = 0.4429, lr_0 = 1.0856e-04
Validation mae = 0.388079
Epoch 29
Loss = 2.5701e-02, PNorm = 71.3476, GNorm = 0.3779, lr_0 = 1.0849e-04
Loss = 2.5633e-02, PNorm = 71.3489, GNorm = 0.4300, lr_0 = 1.0841e-04
Loss = 2.4274e-02, PNorm = 71.3497, GNorm = 0.5091, lr_0 = 1.0834e-04
Loss = 2.7256e-02, PNorm = 71.3509, GNorm = 0.4570, lr_0 = 1.0827e-04
Loss = 2.6727e-02, PNorm = 71.3532, GNorm = 0.3342, lr_0 = 1.0819e-04
Loss = 2.7128e-02, PNorm = 71.3545, GNorm = 0.3168, lr_0 = 1.0812e-04
Loss = 2.9435e-02, PNorm = 71.3556, GNorm = 0.4591, lr_0 = 1.0804e-04
Loss = 2.9699e-02, PNorm = 71.3578, GNorm = 0.4989, lr_0 = 1.0797e-04
Loss = 2.6972e-02, PNorm = 71.3598, GNorm = 0.5952, lr_0 = 1.0790e-04
Loss = 2.9189e-02, PNorm = 71.3613, GNorm = 0.4353, lr_0 = 1.0782e-04
Loss = 2.8223e-02, PNorm = 71.3628, GNorm = 0.4172, lr_0 = 1.0775e-04
Loss = 3.0683e-02, PNorm = 71.3644, GNorm = 0.3869, lr_0 = 1.0767e-04
Loss = 3.2760e-02, PNorm = 71.3667, GNorm = 0.6949, lr_0 = 1.0760e-04
Loss = 2.4985e-02, PNorm = 71.3685, GNorm = 0.4711, lr_0 = 1.0753e-04
Loss = 3.3128e-02, PNorm = 71.3707, GNorm = 0.6979, lr_0 = 1.0745e-04
Loss = 2.8627e-02, PNorm = 71.3730, GNorm = 0.3949, lr_0 = 1.0738e-04
Loss = 2.8407e-02, PNorm = 71.3742, GNorm = 0.3691, lr_0 = 1.0731e-04
Loss = 2.8328e-02, PNorm = 71.3754, GNorm = 0.3644, lr_0 = 1.0723e-04
Loss = 2.9959e-02, PNorm = 71.3769, GNorm = 0.4455, lr_0 = 1.0716e-04
Loss = 2.7115e-02, PNorm = 71.3774, GNorm = 0.5520, lr_0 = 1.0709e-04
Loss = 2.8826e-02, PNorm = 71.3789, GNorm = 0.4568, lr_0 = 1.0701e-04
Loss = 2.8792e-02, PNorm = 71.3805, GNorm = 0.4430, lr_0 = 1.0694e-04
Loss = 2.7631e-02, PNorm = 71.3813, GNorm = 0.4469, lr_0 = 1.0687e-04
Loss = 2.5701e-02, PNorm = 71.3834, GNorm = 0.5437, lr_0 = 1.0679e-04
Loss = 3.5873e-02, PNorm = 71.3854, GNorm = 0.4806, lr_0 = 1.0672e-04
Loss = 2.6073e-02, PNorm = 71.3873, GNorm = 0.7738, lr_0 = 1.0665e-04
Loss = 3.1742e-02, PNorm = 71.3891, GNorm = 0.8558, lr_0 = 1.0657e-04
Loss = 3.1978e-02, PNorm = 71.3912, GNorm = 0.6045, lr_0 = 1.0650e-04
Loss = 3.2076e-02, PNorm = 71.3931, GNorm = 0.6663, lr_0 = 1.0643e-04
Loss = 2.9677e-02, PNorm = 71.3946, GNorm = 0.5110, lr_0 = 1.0635e-04
Loss = 3.0309e-02, PNorm = 71.3962, GNorm = 0.6995, lr_0 = 1.0628e-04
Loss = 2.8200e-02, PNorm = 71.3980, GNorm = 0.6449, lr_0 = 1.0621e-04
Loss = 2.5096e-02, PNorm = 71.3992, GNorm = 0.4258, lr_0 = 1.0614e-04
Loss = 2.7005e-02, PNorm = 71.4000, GNorm = 0.4710, lr_0 = 1.0606e-04
Loss = 2.8286e-02, PNorm = 71.4013, GNorm = 0.3714, lr_0 = 1.0599e-04
Loss = 2.9704e-02, PNorm = 71.4030, GNorm = 0.4751, lr_0 = 1.0592e-04
Loss = 3.4452e-02, PNorm = 71.4042, GNorm = 0.4692, lr_0 = 1.0585e-04
Loss = 2.7127e-02, PNorm = 71.4057, GNorm = 0.4316, lr_0 = 1.0577e-04
Loss = 2.9138e-02, PNorm = 71.4072, GNorm = 0.4726, lr_0 = 1.0570e-04
Loss = 3.3040e-02, PNorm = 71.4093, GNorm = 0.7041, lr_0 = 1.0563e-04
Loss = 2.9149e-02, PNorm = 71.4110, GNorm = 0.4217, lr_0 = 1.0556e-04
Loss = 3.4247e-02, PNorm = 71.4133, GNorm = 0.6890, lr_0 = 1.0548e-04
Loss = 3.1417e-02, PNorm = 71.4152, GNorm = 0.5772, lr_0 = 1.0541e-04
Loss = 2.8143e-02, PNorm = 71.4164, GNorm = 0.3562, lr_0 = 1.0534e-04
Loss = 2.9181e-02, PNorm = 71.4173, GNorm = 0.3994, lr_0 = 1.0527e-04
Loss = 2.9457e-02, PNorm = 71.4185, GNorm = 0.4638, lr_0 = 1.0519e-04
Loss = 2.4033e-02, PNorm = 71.4203, GNorm = 0.4155, lr_0 = 1.0512e-04
Loss = 2.6011e-02, PNorm = 71.4218, GNorm = 0.3844, lr_0 = 1.0505e-04
Loss = 2.8839e-02, PNorm = 71.4239, GNorm = 0.3932, lr_0 = 1.0498e-04
Loss = 3.0776e-02, PNorm = 71.4261, GNorm = 0.6305, lr_0 = 1.0491e-04
Loss = 3.5718e-02, PNorm = 71.4279, GNorm = 0.6161, lr_0 = 1.0483e-04
Loss = 3.4922e-02, PNorm = 71.4297, GNorm = 0.4652, lr_0 = 1.0476e-04
Loss = 2.3283e-02, PNorm = 71.4314, GNorm = 0.3707, lr_0 = 1.0469e-04
Loss = 3.0784e-02, PNorm = 71.4332, GNorm = 0.5757, lr_0 = 1.0462e-04
Loss = 2.8360e-02, PNorm = 71.4351, GNorm = 0.5108, lr_0 = 1.0455e-04
Loss = 2.8953e-02, PNorm = 71.4365, GNorm = 0.6455, lr_0 = 1.0448e-04
Loss = 2.8892e-02, PNorm = 71.4374, GNorm = 0.3975, lr_0 = 1.0440e-04
Loss = 3.3810e-02, PNorm = 71.4385, GNorm = 0.5708, lr_0 = 1.0433e-04
Loss = 2.9752e-02, PNorm = 71.4404, GNorm = 0.4033, lr_0 = 1.0426e-04
Loss = 3.2820e-02, PNorm = 71.4412, GNorm = 0.7302, lr_0 = 1.0419e-04
Loss = 2.8938e-02, PNorm = 71.4412, GNorm = 0.4860, lr_0 = 1.0412e-04
Loss = 3.0041e-02, PNorm = 71.4424, GNorm = 0.4385, lr_0 = 1.0405e-04
Loss = 2.7335e-02, PNorm = 71.4431, GNorm = 0.4329, lr_0 = 1.0398e-04
Loss = 3.2599e-02, PNorm = 71.4439, GNorm = 0.4510, lr_0 = 1.0391e-04
Loss = 2.8897e-02, PNorm = 71.4449, GNorm = 0.3753, lr_0 = 1.0383e-04
Loss = 3.0925e-02, PNorm = 71.4463, GNorm = 0.4414, lr_0 = 1.0376e-04
Loss = 2.7469e-02, PNorm = 71.4476, GNorm = 0.4690, lr_0 = 1.0369e-04
Loss = 3.2770e-02, PNorm = 71.4482, GNorm = 0.5146, lr_0 = 1.0362e-04
Loss = 2.4059e-02, PNorm = 71.4492, GNorm = 0.3645, lr_0 = 1.0355e-04
Loss = 3.0115e-02, PNorm = 71.4502, GNorm = 0.3991, lr_0 = 1.0348e-04
Loss = 2.5933e-02, PNorm = 71.4508, GNorm = 0.3812, lr_0 = 1.0341e-04
Loss = 3.1879e-02, PNorm = 71.4521, GNorm = 0.4310, lr_0 = 1.0334e-04
Loss = 3.2445e-02, PNorm = 71.4536, GNorm = 0.5127, lr_0 = 1.0327e-04
Loss = 3.4083e-02, PNorm = 71.4559, GNorm = 0.7576, lr_0 = 1.0320e-04
Loss = 3.0819e-02, PNorm = 71.4570, GNorm = 0.4249, lr_0 = 1.0312e-04
Loss = 3.0681e-02, PNorm = 71.4583, GNorm = 0.4339, lr_0 = 1.0305e-04
Loss = 2.9887e-02, PNorm = 71.4602, GNorm = 0.4055, lr_0 = 1.0298e-04
Loss = 3.0003e-02, PNorm = 71.4610, GNorm = 0.6857, lr_0 = 1.0291e-04
Loss = 3.0671e-02, PNorm = 71.4622, GNorm = 0.4647, lr_0 = 1.0284e-04
Loss = 2.5337e-02, PNorm = 71.4634, GNorm = 0.6760, lr_0 = 1.0277e-04
Loss = 3.0759e-02, PNorm = 71.4648, GNorm = 0.4253, lr_0 = 1.0270e-04
Loss = 2.5846e-02, PNorm = 71.4656, GNorm = 0.4161, lr_0 = 1.0263e-04
Loss = 2.8616e-02, PNorm = 71.4658, GNorm = 0.3897, lr_0 = 1.0256e-04
Loss = 3.0317e-02, PNorm = 71.4667, GNorm = 0.4065, lr_0 = 1.0249e-04
Loss = 2.7639e-02, PNorm = 71.4678, GNorm = 0.4101, lr_0 = 1.0242e-04
Loss = 3.2396e-02, PNorm = 71.4691, GNorm = 0.5591, lr_0 = 1.0235e-04
Loss = 3.0766e-02, PNorm = 71.4708, GNorm = 0.4119, lr_0 = 1.0228e-04
Loss = 2.7079e-02, PNorm = 71.4725, GNorm = 0.3985, lr_0 = 1.0221e-04
Loss = 2.5237e-02, PNorm = 71.4739, GNorm = 0.4072, lr_0 = 1.0214e-04
Loss = 3.5106e-02, PNorm = 71.4750, GNorm = 0.7389, lr_0 = 1.0207e-04
Loss = 2.6864e-02, PNorm = 71.4757, GNorm = 0.4901, lr_0 = 1.0200e-04
Loss = 2.8617e-02, PNorm = 71.4768, GNorm = 0.6051, lr_0 = 1.0193e-04
Loss = 2.8498e-02, PNorm = 71.4782, GNorm = 0.5885, lr_0 = 1.0186e-04
Loss = 3.3743e-02, PNorm = 71.4794, GNorm = 0.6094, lr_0 = 1.0179e-04
Loss = 2.8445e-02, PNorm = 71.4804, GNorm = 0.3991, lr_0 = 1.0172e-04
Loss = 3.0472e-02, PNorm = 71.4807, GNorm = 0.4261, lr_0 = 1.0165e-04
Loss = 2.8484e-02, PNorm = 71.4817, GNorm = 0.4391, lr_0 = 1.0158e-04
Loss = 3.3526e-02, PNorm = 71.4831, GNorm = 0.5845, lr_0 = 1.0151e-04
Loss = 2.8892e-02, PNorm = 71.4838, GNorm = 0.3543, lr_0 = 1.0144e-04
Loss = 2.8746e-02, PNorm = 71.4848, GNorm = 0.6037, lr_0 = 1.0137e-04
Loss = 3.0493e-02, PNorm = 71.4858, GNorm = 0.4563, lr_0 = 1.0130e-04
Loss = 3.1984e-02, PNorm = 71.4872, GNorm = 0.4179, lr_0 = 1.0123e-04
Loss = 3.1288e-02, PNorm = 71.4886, GNorm = 0.6644, lr_0 = 1.0116e-04
Loss = 2.9490e-02, PNorm = 71.4899, GNorm = 0.4700, lr_0 = 1.0110e-04
Loss = 3.8306e-02, PNorm = 71.4910, GNorm = 1.0862, lr_0 = 1.0103e-04
Loss = 3.2192e-02, PNorm = 71.4932, GNorm = 0.6569, lr_0 = 1.0096e-04
Loss = 3.2940e-02, PNorm = 71.4942, GNorm = 0.4762, lr_0 = 1.0089e-04
Loss = 3.5351e-02, PNorm = 71.4950, GNorm = 0.5241, lr_0 = 1.0082e-04
Loss = 2.8191e-02, PNorm = 71.4957, GNorm = 0.6396, lr_0 = 1.0075e-04
Loss = 2.8141e-02, PNorm = 71.4969, GNorm = 0.4221, lr_0 = 1.0068e-04
Loss = 3.7008e-02, PNorm = 71.4975, GNorm = 0.4410, lr_0 = 1.0061e-04
Loss = 3.3718e-02, PNorm = 71.4990, GNorm = 0.4753, lr_0 = 1.0054e-04
Loss = 3.4874e-02, PNorm = 71.5009, GNorm = 0.6662, lr_0 = 1.0047e-04
Loss = 3.2912e-02, PNorm = 71.5017, GNorm = 0.7904, lr_0 = 1.0041e-04
Loss = 2.7356e-02, PNorm = 71.5026, GNorm = 0.5017, lr_0 = 1.0034e-04
Loss = 3.5530e-02, PNorm = 71.5038, GNorm = 0.5327, lr_0 = 1.0027e-04
Loss = 2.9119e-02, PNorm = 71.5047, GNorm = 0.4142, lr_0 = 1.0020e-04
Loss = 3.4509e-02, PNorm = 71.5051, GNorm = 0.5364, lr_0 = 1.0013e-04
Loss = 3.5514e-02, PNorm = 71.5065, GNorm = 0.6280, lr_0 = 1.0006e-04
Loss = 3.1481e-02, PNorm = 71.5079, GNorm = 0.5759, lr_0 = 1.0000e-04
Validation mae = 0.389549
Model 0 best validation mae = 0.384569 on epoch 22
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Moving model to cuda
Model 0 test mae = 0.383000
Ensemble test mae = 0.383000
10-fold cross validation
	Seed 0 ==> test mae = 0.380772
	Seed 1 ==> test mae = 0.381854
	Seed 2 ==> test mae = 0.377298
	Seed 3 ==> test mae = 0.381340
	Seed 4 ==> test mae = 0.383287
	Seed 5 ==> test mae = 0.378214
	Seed 6 ==> test mae = 0.380300
	Seed 7 ==> test mae = 0.381892
	Seed 8 ==> test mae = 0.380340
	Seed 9 ==> test mae = 0.383000
Overall test mae = 0.380830 +/- 0.001817
Elapsed time = 2:41:42
